Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

ENH: kernels forrandom.vonmisses; part 2#681

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to ourterms of service andprivacy statement. We’ll occasionally send you account related emails.

Already on GitHub?Sign in to your account

Draft
samir-nasibli wants to merge17 commits intomaster
base:master
Choose a base branch
Loading
fromsamir-nasibli/enh/vonmisses_random
Draft
Show file tree
Hide file tree
Changes fromall commits
Commits
Show all changes
17 commits
Select commitHold shift + click to select a range
63eeab1
ENH: kernels for random.vonmisses
samir-nasibliApr 14, 2021
5e6086c
update
samir-nasibliApr 14, 2021
4268517
refactoring
samir-nasibliApr 14, 2021
6f77dc0
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
samir-nasibliApr 21, 2021
b5f539a
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
samir-nasibliApr 23, 2021
e9c17c7
disabled tests on CPU
samir-nasibliApr 23, 2021
75dc985
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
samir-nasibliMay 4, 2021
7221851
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
samir-nasibliMay 24, 2021
df3160f
tmp solution
samir-nasibliMay 26, 2021
0e743d2
revert last changes on dpnp_krnl_random.cpp
samir-nasibliMay 26, 2021
ff8de8e
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
samir-nasibliJul 12, 2021
1492555
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
samir-nasibliJul 13, 2021
233cd59
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
samir-nasibliAug 19, 2021
30637c6
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
shssfAug 23, 2021
3222bc5
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
Alexander-MakaryevSep 29, 2021
2c3eeb2
Merge branch 'master' into samir-nasibli/enh/vonmisses_random
densmirnOct 7, 2021
b2b3c42
Plukiche/vonmisses random (#998)
LukichevaPolinaOct 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 80 additions & 55 deletionsdpnp/backend/kernels/dpnp_krnl_random.cpp
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -1261,6 +1261,8 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da
_DataType s_minus_one, hpt, r_over_two_kappa_minus_one, rho_minus_one;
_DataType* Uvec = nullptr;
_DataType* Vvec = nullptr;
bool* result_ready = nullptr;
bool* result_mask = nullptr;
const _DataType d_zero = 0.0, d_one = 1.0;

assert(kappa > 1.0);
Expand All@@ -1276,49 +1278,59 @@ void dpnp_rng_vonmises_large_kappa_c(void* result, const _DataType mu, const _Da

Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));

result_ready = reinterpret_cast<bool*>(dpnp_memory_alloc_c(1 * sizeof(bool)));
result_ready[0] = false;
result_mask = reinterpret_cast<bool*>(dpnp_memory_alloc_c(size * sizeof(bool)));
dpnp_full_c<bool>(result_ready, result_mask, size);

for (size_t n = 0; n < size;)
while(!result_ready[0])
{
size_t diff_size = size - n;
mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, 0.5 * M_PI);
auto event_out = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec);
event_out.wait();
// TODO
// use deps case
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec);
mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one);
event_out = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec);
event_out.wait();
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec);

// TODO
// kernel
for (size_t i = 0; i < diff_size; i++)
{
_DataType sn, cn, sn2, cn2;
_DataType neg_W_minus_one, V, Y;
cl::sycl::range<1> gws(size);
auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
Copy link
Contributor

@shssfshssfMay 13, 2021
edited
Loading

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others.Learn more.

Kernel inside the loop with bigger trip count. It would be more efficient to parallelize (make kernel) the algorithm by bigger valuesize insteadsize-n. So, it will require a loop inside the kernel.
It is questionable what will be more performant

  1. loop with a kernels queue (data dependent)
  2. kernel with a loop

It is hard to predict it with no perf measurements but I would vote that parallelization with bigger number of threads should be better.

cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event});
cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) {
size_t i = global_id[0];
if (!result_mask[i]) {
_DataType sn, cn, sn2, cn2;
_DataType neg_W_minus_one, V, Y;

sn = sin(Uvec[i]);
cn = cos(Uvec[i]);
V = Vvec[i];
sn2 = sn * sn;
cn2 = cn * cn;
sn =cl::sycl::sin(Uvec[i]);
cn =cl::sycl::cos(Uvec[i]);
V = Vvec[i];
sn2 = sn * sn;
cn2 = cn * cn;

neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
Y = kappa * (s_minus_one + neg_W_minus_one);
neg_W_minus_one = s_minus_one * sn2 / (0.5 * s_minus_one + cn2);
Y = kappa * (s_minus_one + neg_W_minus_one);

if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y))
{
Y = neg_W_minus_one * (2 - neg_W_minus_one);
if (Y < 0)
Y = 0.0;
else if (Y > 1.0)
Y = 1.0;
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
{
Y = neg_W_minus_one * (2 - neg_W_minus_one);
if (Y < 0)
Y = 0.0;
else if (Y > 1.0)
Y = 1.0;

result1[i] = cl::sycl::asin(cl::sycl::sqrt(Y));
result_mask[i] = true;
}
}
});
};
auto some_event = DPNP_QUEUE.submit(paral_kernel_some);
some_event.wait();

result1[n++] = asin(sqrt(Y));
}
}
dpnp_all_c<bool, bool>(result_mask, result_ready, size);
}

dpnp_memory_free_c(Uvec);
dpnp_memory_free_c(result_ready);
dpnp_memory_free_c(result_mask);

mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one);
auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);
Expand DownExpand Up@@ -1357,6 +1369,8 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da
_DataType rho_over_kappa, rho, r, s_kappa;
_DataType* Uvec = nullptr;
_DataType* Vvec = nullptr;
bool* result_ready = nullptr;
bool* result_mask = nullptr;

const _DataType d_zero = 0.0, d_one = 1.0;

Expand All@@ -1372,35 +1386,46 @@ void dpnp_rng_vonmises_small_kappa_c(void* result, const _DataType mu, const _Da
Uvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));
Vvec = reinterpret_cast<_DataType*>(dpnp_memory_alloc_c(size * sizeof(_DataType)));

for (size_t n = 0; n < size;)
result_ready = reinterpret_cast<bool*>(dpnp_memory_alloc_c(1 * sizeof(bool)));
result_ready[0] = false;
result_mask = reinterpret_cast<bool*>(dpnp_memory_alloc_c(size * sizeof(bool)));
dpnp_full_c<bool>(result_ready, result_mask, size);

while (!result_ready[0])
{
size_t diff_size = size - n;
mkl_rng::uniform<_DataType> uniform_distribution_u(d_zero, M_PI);
auto event_out = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, diff_size, Uvec);
event_out.wait();
// TODO
// use deps case
auto uniform_distr_u_event = mkl_rng::generate(uniform_distribution_u, DPNP_RNG_ENGINE, size, Uvec);
mkl_rng::uniform<_DataType> uniform_distribution_v(d_zero, d_one);
event_out = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, diff_size, Vvec);
event_out.wait();
auto uniform_distr_v_event = mkl_rng::generate(uniform_distribution_v, DPNP_RNG_ENGINE, size, Vvec);

// TODO
// kernel
for (size_t i = 0; i < diff_size; i++)
{
_DataType Z, W, Y, V;
Z = cos(Uvec[i]);
V = Vvec[i];
W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
Y = s_kappa - kappa * W;
if ((Y * (2 - Y) >= V) || (log(Y / V) + 1 >= Y))
{
result1[n++] = acos(W);
}
}
}
cl::sycl::range<1> gws((size));

auto paral_kernel_some = [&](cl::sycl::handler& cgh) {
cgh.depends_on({uniform_distr_u_event, uniform_distr_v_event});
cgh.parallel_for(gws, [=](cl::sycl::id<1> global_id) {
size_t i = global_id[0];
if (!result_mask[i]) {
_DataType Z, W, Y, V;
Z = cl::sycl::cos(Uvec[i]);
V = Vvec[i];
W = (kappa + s_kappa * Z) / (s_kappa + kappa * Z);
Y = s_kappa - kappa * W;
if ((Y * (2 - Y) >= V) || (cl::sycl::log(Y / V) + 1 >= Y))
{
result1[i] = cl::sycl::acos(W);
result_mask[i] = true;
}
}
});
};
auto some_event = DPNP_QUEUE.submit(paral_kernel_some);
some_event.wait();

dpnp_all_c<bool, bool>(result_mask, result_ready, size);
}
dpnp_memory_free_c(Uvec);
dpnp_memory_free_c(result_ready);
dpnp_memory_free_c(result_mask);

mkl_rng::uniform<_DataType> uniform_distribution(d_zero, d_one);
auto uniform_distr_event = mkl_rng::generate(uniform_distribution, DPNP_RNG_ENGINE, size, Vvec);
Expand Down
2 changes: 1 addition & 1 deletiontests/test_random.py
View file
Open in desktop
Original file line numberDiff line numberDiff line change
Expand Up@@ -875,7 +875,7 @@ def test_invalid_args(self):
@pytest.mark.parametrize("kappa", [5.0, 0.5], ids=['large_kappa', 'small_kappa'])
def test_seed(self, kappa):
seed = 28041990
size =10
size =1000
mu = 2.
dpnp.random.seed(seed)
a1 = dpnp.asarray(dpnp.random.vonmises(mu, kappa, size=size))
Expand Down

[8]ページ先頭

©2009-2025 Movatter.jp