@@ -475,54 +475,72 @@ void dpnp_rng_logistic_c(void* result, const double loc, const double scale, con
475475template <typename _DataType>
476476void dpnp_rng_lognormal_c (void * result, const _DataType mean, const _DataType stddev, const size_t size)
477477{
478- if (!size)
478+ if (!size || !result )
479479 {
480480 return ;
481481 }
482482 _DataType* result1 = reinterpret_cast <_DataType*>(result);
483483
484- const _DataType displacement = _DataType (0.0 );
485-
486- const _DataType scalefactor = _DataType (1.0 );
484+ if (stddev == 0.0 )
485+ {
486+ _DataType* fill_value = reinterpret_cast <_DataType*>(dpnp_memory_alloc_c (sizeof (_DataType)));
487+ fill_value[0 ] = static_cast <_DataType>(std::exp (mean + (stddev * stddev) / 2 ));
488+ dpnp_initval_c<_DataType>(result, fill_value, size);
489+ dpnp_memory_free_c (fill_value);
490+ }
491+ else
492+ {
493+ const _DataType displacement = _DataType (0.0 );
494+ const _DataType scalefactor = _DataType (1.0 );
487495
488- mkl_rng::lognormal<_DataType> distribution (mean, stddev, displacement, scalefactor);
489- // perform generation
490- auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, size, result1);
491- event_out.wait ();
496+ mkl_rng::lognormal<_DataType> distribution (mean, stddev, displacement, scalefactor);
497+ auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, size, result1);
498+ event_out.wait ();
499+ }
500+ return ;
492501}
493502
494503template <typename _DataType>
495504void dpnp_rng_multinomial_c (
496505 void * result, const int ntrial, const double * p_vector, const size_t p_vector_size, const size_t size)
497506{
498- if (!size)
507+ if (!size || !result )
499508 {
500509 return ;
501510 }
502- std::int32_t * result1 = reinterpret_cast <std::int32_t *>(result);
503- std::vector<double > p (p_vector, p_vector + p_vector_size);
504- // size = size
505- // `result` is a array for random numbers
506- // `size` is a `result`'s len. `size = n * p.size()`
507- // `n` is a number of random values to be generated.
508- size_t n = size / p.size ();
509511
510- if (dpnp_queue_is_cpu_c () )
512+ if (ntrial == 0 )
511513 {
512- mkl_rng::multinomial<std::int32_t > distribution (ntrial, p);
513- // perform generation
514- auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, n, result1);
515- event_out.wait ();
514+ dpnp_zeros_c<_DataType>(result, size);
516515 }
517516 else
518517 {
519- int errcode = viRngMultinomial (
520- VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, get_rng_stream (), n, result1, ntrial, p_vector_size, p_vector);
521- if (errcode != VSL_STATUS_OK)
518+ std::int32_t * result1 = reinterpret_cast <std::int32_t *>(result);
519+ std::vector<double > p (p_vector, p_vector + p_vector_size);
520+ // size = size
521+ // `result` is a array for random numbers
522+ // `size` is a `result`'s len. `size = n * p.size()`
523+ // `n` is a number of random values to be generated.
524+ size_t n = size / p.size ();
525+
526+ if (dpnp_queue_is_cpu_c ())
522527 {
523- throw std::runtime_error (" DPNP RNG Error: dpnp_rng_multinomial_c() failed." );
528+ mkl_rng::multinomial<std::int32_t > distribution (ntrial, p);
529+ // perform generation
530+ auto event_out = mkl_rng::generate (distribution, DPNP_RNG_ENGINE, n, result1);
531+ event_out.wait ();
532+ }
533+ else
534+ {
535+ int errcode = viRngMultinomial (
536+ VSL_RNG_METHOD_MULTINOMIAL_MULTPOISSON, get_rng_stream (), n, result1, ntrial, p_vector_size, p_vector);
537+ if (errcode != VSL_STATUS_OK)
538+ {
539+ throw std::runtime_error (" DPNP RNG Error: dpnp_rng_multinomial_c() failed." );
540+ }
524541 }
525542 }
543+ return ;
526544}
527545
528546template <typename _DataType>
@@ -946,17 +964,20 @@ template <typename _DataType>
946964void dpnp_rng_shuffle_c (
947965 void * result, const size_t itemsize, const size_t ndim, const size_t high_dim_size, const size_t size)
948966{
949- if (!(size) || !(high_dim_size > 1 ) )
967+ if (!result )
950968 {
951969 return ;
952970 }
953971
954- char * result1 = reinterpret_cast <char *>(result);
972+ if (!size || !ndim || !(high_dim_size > 1 ))
973+ {
974+ return ;
975+ }
955976
956- double * Uvec = nullptr ;
977+ char * result1 = reinterpret_cast < char *>(result) ;
957978
958979 size_t uvec_size = high_dim_size - 1 ;
959- Uvec = reinterpret_cast <double *>(dpnp_memory_alloc_c (uvec_size * sizeof (double )));
980+ double * Uvec = reinterpret_cast <double *>(dpnp_memory_alloc_c (uvec_size * sizeof (double )));
960981 mkl_rng::uniform<double > uniform_distribution (0.0 , 1.0 );
961982 auto uniform_event = mkl_rng::generate (uniform_distribution, DPNP_RNG_ENGINE, uvec_size, Uvec);
962983 uniform_event.wait ();
@@ -966,42 +987,52 @@ void dpnp_rng_shuffle_c(
966987 // Fast, statically typed path: shuffle the underlying buffer.
967988 // Only for non-empty, 1d objects of class ndarray (subclasses such
968989 // as MaskedArrays may not support this approach).
969- // TODO
970- // kernel
971- char * buf = nullptr ;
972- buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (itemsize * sizeof (char )));
990+ char * buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (itemsize * sizeof (char )));
973991 for (size_t i = uvec_size; i > 0 ; i--)
974992 {
975993 size_t j = (size_t )(floor ((i + 1 ) * Uvec[i - 1 ]));
976- memcpy (buf, result1 + j * itemsize, itemsize);
977- memcpy (result1 + j * itemsize, result1 + i * itemsize, itemsize);
978- memcpy (result1 + i * itemsize, buf, itemsize);
994+ if (i != j)
995+ {
996+ auto memcpy1 =
997+ DPNP_QUEUE.submit ([&](cl::sycl::handler& h) { h.memcpy (buf, result1 + j * itemsize, itemsize); });
998+ auto memcpy2 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
999+ h.depends_on ({memcpy1});
1000+ h.memcpy (result1 + j * itemsize, result1 + i * itemsize, itemsize);
1001+ });
1002+ auto memcpy3 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
1003+ h.depends_on ({memcpy2});
1004+ h.memcpy (result1 + i * itemsize, buf, itemsize);
1005+ });
1006+ memcpy3.wait ();
1007+ }
9791008 }
980-
9811009 dpnp_memory_free_c (buf);
9821010 }
9831011 else
9841012 {
9851013 // Multidimensional ndarrays require a bounce buffer.
986- // TODO
987- // kernel
988- char * buf = nullptr ;
9891014 size_t step_size = (size / high_dim_size) * itemsize; // size in bytes for x[i] element
990- buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (step_size * sizeof (char )));
1015+ char * buf = reinterpret_cast <char *>(dpnp_memory_alloc_c (step_size * sizeof (char )));
9911016 for (size_t i = uvec_size; i > 0 ; i--)
9921017 {
9931018 size_t j = (size_t )(floor ((i + 1 ) * Uvec[i - 1 ]));
9941019 if (j < i)
9951020 {
996- memcpy (buf, result1 + j * step_size, step_size);
997- memcpy (result1 + j * step_size, result1 + i * step_size, step_size);
998- memcpy (result1 + i * step_size, buf, step_size);
1021+ auto memcpy1 =
1022+ DPNP_QUEUE.submit ([&](cl::sycl::handler& h) { h.memcpy (buf, result1 + j * step_size, step_size); });
1023+ auto memcpy2 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
1024+ h.depends_on ({memcpy1});
1025+ h.memcpy (result1 + j * step_size, result1 + i * step_size, step_size);
1026+ });
1027+ auto memcpy3 = DPNP_QUEUE.submit ([&](cl::sycl::handler& h) {
1028+ h.depends_on ({memcpy2});
1029+ h.memcpy (result1 + i * step_size, buf, step_size);
1030+ });
1031+ memcpy3.wait ();
9991032 }
10001033 }
1001-
10021034 dpnp_memory_free_c (buf);
10031035 }
1004-
10051036 dpnp_memory_free_c (Uvec);
10061037}
10071038
0 commit comments