CuPBoP/examples/vecadd/vecadd-cuda-nvptx64-nvidia-...

# 1 "vecadd.cu"
# 1 "<built-in>" 1
# 1 "<built-in>" 3
# 745 "<built-in>" 3
# 1 "<command line>" 1
# 1 "<built-in>" 2
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 1 3
# 32 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_math_forward_declares.h" 1 3
# 26 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_math_forward_declares.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) long abs(long);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long long abs(long long);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double abs(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float abs(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) int abs(int);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double acos(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float acos(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double acosh(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float acosh(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double asin(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float asin(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double asinh(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float asinh(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double atan2(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float atan2(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double atan(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float atan(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double atanh(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float atanh(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double cbrt(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float cbrt(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double ceil(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float ceil(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double copysign(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float copysign(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double cos(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float cos(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double cosh(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float cosh(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double erfc(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float erfc(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double erf(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float erf(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double exp2(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float exp2(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double exp(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float exp(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double expm1(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float expm1(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double fabs(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float fabs(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double fdim(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float fdim(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double floor(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float floor(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double fma(double, double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float fma(float, float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double fmax(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float fmax(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double fmin(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float fmin(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double fmod(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float fmod(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) int fpclassify(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) int fpclassify(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double frexp(double, int *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float frexp(float, int *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double hypot(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float hypot(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) int ilogb(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) int ilogb(float);


static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isfinite(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isfinite(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isgreater(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isgreaterequal(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isgreaterequal(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isgreater(float, float);


static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isinf(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isinf(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isless(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool islessequal(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool islessequal(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isless(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool islessgreater(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool islessgreater(float, float);


static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isnan(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isnan(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isnormal(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isnormal(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isunordered(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool isunordered(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long labs(long);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double ldexp(double, int);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float ldexp(float, int);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double lgamma(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float lgamma(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long long llabs(long long);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long long llrint(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long long llrint(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double log10(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float log10(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double log1p(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float log1p(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double log2(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float log2(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double logb(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float logb(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double log(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float log(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long lrint(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long lrint(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long lround(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long lround(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) long long llround(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double modf(double, double *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float modf(float, float *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double nan(const char *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float nanf(const char *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double nearbyint(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float nearbyint(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double nextafter(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float nextafter(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double pow(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double pow(double, int);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float pow(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float pow(float, int);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double remainder(double, double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float remainder(float, float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double remquo(double, double, int *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float remquo(float, float, int *);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double rint(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float rint(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double round(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float round(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double scalbln(double, long);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float scalbln(float, long);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double scalbn(double, int);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float scalbn(float, int);


static __inline__ __attribute__((always_inline)) __attribute__((device)) bool signbit(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) bool signbit(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double sin(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float sin(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double sinh(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float sinh(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double sqrt(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float sqrt(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double tan(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float tan(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double tanh(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float tanh(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double tgamma(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float tgamma(float);
static __inline__ __attribute__((always_inline)) __attribute__((device)) double trunc(double);
static __inline__ __attribute__((always_inline)) __attribute__((device)) float trunc(float);
# 194 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_math_forward_declares.h" 3
namespace std {


using ::abs;
using ::acos;
using ::acosh;
using ::asin;
using ::asinh;
using ::atan;
using ::atan2;
using ::atanh;
using ::cbrt;
using ::ceil;
using ::copysign;
using ::cos;
using ::cosh;
using ::erf;
using ::erfc;
using ::exp;
using ::exp2;
using ::expm1;
using ::fabs;
using ::fdim;
using ::floor;
using ::fma;
using ::fmax;
using ::fmin;
using ::fmod;
using ::fpclassify;
using ::frexp;
using ::hypot;
using ::ilogb;
using ::isfinite;
using ::isgreater;
using ::isgreaterequal;
using ::isinf;
using ::isless;
using ::islessequal;
using ::islessgreater;
using ::isnan;
using ::isnormal;
using ::isunordered;
using ::labs;
using ::ldexp;
using ::lgamma;
using ::llabs;
using ::llrint;
using ::log;
using ::log10;
using ::log1p;
using ::log2;
using ::logb;
using ::lrint;
using ::lround;
using ::llround;
using ::modf;
using ::nan;
using ::nanf;
using ::nearbyint;
using ::nextafter;
using ::pow;
using ::remainder;
using ::remquo;
using ::rint;
using ::round;
using ::scalbln;
using ::scalbn;
using ::signbit;
using ::sin;
using ::sinh;
using ::sqrt;
using ::tan;
using ::tanh;
using ::tgamma;
using ::trunc;


}
# 33 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3


# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 1 3
# 40 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 3

# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/c++config.h" 1 3
# 296 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/c++config.h" 3
namespace std
{
  typedef long unsigned int size_t;
  typedef long int ptrdiff_t;


  typedef decltype(nullptr) nullptr_t;


#pragma GCC visibility push(default)


  extern "C++" __attribute__ ((__noreturn__, __always_inline__))
  inline void __terminate() noexcept
  {
    void terminate() noexcept __attribute__ ((__noreturn__));
    terminate();
  }
#pragma GCC visibility pop
}
# 329 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/c++config.h" 3
namespace std
{
  inline namespace __cxx11 __attribute__((__abi_tag__ ("cxx11"))) { }
}
namespace __gnu_cxx
{
  inline namespace __cxx11 __attribute__((__abi_tag__ ("cxx11"))) { }
}
# 508 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/c++config.h" 3
namespace std
{
#pragma GCC visibility push(default)


  constexpr inline bool
  __is_constant_evaluated() noexcept
  {


    return __builtin_is_constant_evaluated();


  }
#pragma GCC visibility pop
}
# 655 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/c++config.h" 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/os_defines.h" 1 3
# 39 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/os_defines.h" 3
# 1 "/usr/include/features.h" 1 3 4
# 392 "/usr/include/features.h" 3 4
# 1 "/usr/include/features-time64.h" 1 3 4
# 20 "/usr/include/features-time64.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 21 "/usr/include/features-time64.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/timesize.h" 1 3 4
# 19 "/usr/include/x86_64-linux-gnu/bits/timesize.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 20 "/usr/include/x86_64-linux-gnu/bits/timesize.h" 2 3 4
# 22 "/usr/include/features-time64.h" 2 3 4
# 393 "/usr/include/features.h" 2 3 4
# 464 "/usr/include/features.h" 3 4
# 1 "/usr/include/stdc-predef.h" 1 3 4
# 465 "/usr/include/features.h" 2 3 4
# 486 "/usr/include/features.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 1 3 4
# 559 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 560 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/long-double.h" 1 3 4
# 561 "/usr/include/x86_64-linux-gnu/sys/cdefs.h" 2 3 4
# 487 "/usr/include/features.h" 2 3 4
# 510 "/usr/include/features.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 1 3 4
# 10 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/gnu/stubs-64.h" 1 3 4
# 11 "/usr/include/x86_64-linux-gnu/gnu/stubs.h" 2 3 4
# 511 "/usr/include/features.h" 2 3 4
# 40 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/os_defines.h" 2 3
# 656 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/c++config.h" 2 3


# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/cpu_defines.h" 1 3
# 659 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/x86_64-linux-gnu/c++/12/bits/c++config.h" 2 3
# 42 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 2 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/cpp_type_traits.h" 1 3
# 36 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/cpp_type_traits.h" 3
# 67 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/cpp_type_traits.h" 3
extern "C++" {

namespace std __attribute__ ((__visibility__ ("default")))
{


  struct __true_type { };
  struct __false_type { };

  template<bool>
    struct __truth_type
    { typedef __false_type __type; };

  template<>
    struct __truth_type<true>
    { typedef __true_type __type; };


  template<class _Sp, class _Tp>
    struct __traitor
    {
      enum { __value = bool(_Sp::__value) || bool(_Tp::__value) };
      typedef typename __truth_type<__value>::__type __type;
    };


  template<typename, typename>
    struct __are_same
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };

  template<typename _Tp>
    struct __are_same<_Tp, _Tp>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<typename _Tp>
    struct __is_void
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };

  template<>
    struct __is_void<void>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<typename _Tp>
    struct __is_integer
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };


  template<>
    struct __is_integer<bool>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<char>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<signed char>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<unsigned char>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<>
    struct __is_integer<wchar_t>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };
# 184 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/cpp_type_traits.h" 3
  template<>
    struct __is_integer<char16_t>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<char32_t>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<>
    struct __is_integer<short>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<unsigned short>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<int>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<unsigned int>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<long>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<unsigned long>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<long long>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_integer<unsigned long long>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };
# 289 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/cpp_type_traits.h" 3
  template<typename _Tp>
    struct __is_floating
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };


  template<>
    struct __is_floating<float>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_floating<double>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_floating<long double>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<typename _Tp>
    struct __is_pointer
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };

  template<typename _Tp>
    struct __is_pointer<_Tp*>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<typename _Tp>
    struct __is_arithmetic
    : public __traitor<__is_integer<_Tp>, __is_floating<_Tp> >
    { };


  template<typename _Tp>
    struct __is_scalar
    : public __traitor<__is_arithmetic<_Tp>, __is_pointer<_Tp> >
    { };


  template<typename _Tp>
    struct __is_char
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };

  template<>
    struct __is_char<char>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<>
    struct __is_char<wchar_t>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };


  template<typename _Tp>
    struct __is_byte
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };

  template<>
    struct __is_byte<char>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_byte<signed char>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };

  template<>
    struct __is_byte<unsigned char>
    {
      enum { __value = 1 };
      typedef __true_type __type;
    };
# 425 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/cpp_type_traits.h" 3
  template<typename> struct iterator_traits;


  template<typename _Tp>
    struct __is_nonvolatile_trivially_copyable
    {
      enum { __value = __is_trivially_copyable(_Tp) };
    };


  template<typename _Tp>
    struct __is_nonvolatile_trivially_copyable<volatile _Tp>
    {
      enum { __value = 0 };
    };


  template<typename _OutputIter, typename _InputIter>
    struct __memcpyable
    {
      enum { __value = 0 };
    };

  template<typename _Tp>
    struct __memcpyable<_Tp*, _Tp*>
    : __is_nonvolatile_trivially_copyable<_Tp>
    { };

  template<typename _Tp>
    struct __memcpyable<_Tp*, const _Tp*>
    : __is_nonvolatile_trivially_copyable<_Tp>
    { };


  template<typename _Iter1, typename _Iter2>
    struct __memcmpable
    {
      enum { __value = 0 };
    };


  template<typename _Tp>
    struct __memcmpable<_Tp*, _Tp*>
    : __is_nonvolatile_trivially_copyable<_Tp>
    { };

  template<typename _Tp>
    struct __memcmpable<const _Tp*, _Tp*>
    : __is_nonvolatile_trivially_copyable<_Tp>
    { };

  template<typename _Tp>
    struct __memcmpable<_Tp*, const _Tp*>
    : __is_nonvolatile_trivially_copyable<_Tp>
    { };


  template<typename _Tp, bool _TreatAsBytes =


 __is_byte<_Tp>::__value

    >
    struct __is_memcmp_ordered
    {
      static const bool __value = _Tp(-1) > _Tp(1);
    };

  template<typename _Tp>
    struct __is_memcmp_ordered<_Tp, false>
    {
      static const bool __value = false;
    };


  template<typename _Tp, typename _Up, bool = sizeof(_Tp) == sizeof(_Up)>
    struct __is_memcmp_ordered_with
    {
      static const bool __value = __is_memcmp_ordered<_Tp>::__value
 && __is_memcmp_ordered<_Up>::__value;
    };

  template<typename _Tp, typename _Up>
    struct __is_memcmp_ordered_with<_Tp, _Up, false>
    {
      static const bool __value = false;
    };
# 550 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/cpp_type_traits.h" 3
  template<typename _Tp>
    struct __is_move_iterator
    {
      enum { __value = 0 };
      typedef __false_type __type;
    };


  template<typename _Iterator>

    inline _Iterator
    __miter_base(_Iterator __it)
    { return __it; }


}
}
# 43 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 2 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/ext/type_traits.h" 1 3
# 33 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/ext/type_traits.h" 3


extern "C++" {

namespace __gnu_cxx __attribute__ ((__visibility__ ("default")))
{


  template<bool, typename>
    struct __enable_if
    { };

  template<typename _Tp>
    struct __enable_if<true, _Tp>
    { typedef _Tp __type; };


  template<bool _Cond, typename _Iftrue, typename _Iffalse>
    struct __conditional_type
    { typedef _Iftrue __type; };

  template<typename _Iftrue, typename _Iffalse>
    struct __conditional_type<false, _Iftrue, _Iffalse>
    { typedef _Iffalse __type; };


  template<typename _Tp>
    struct __add_unsigned
    {
    private:
      typedef __enable_if<std::__is_integer<_Tp>::__value, _Tp> __if_type;

    public:
      typedef typename __if_type::__type __type;
    };

  template<>
    struct __add_unsigned<char>
    { typedef unsigned char __type; };

  template<>
    struct __add_unsigned<signed char>
    { typedef unsigned char __type; };

  template<>
    struct __add_unsigned<short>
    { typedef unsigned short __type; };

  template<>
    struct __add_unsigned<int>
    { typedef unsigned int __type; };

  template<>
    struct __add_unsigned<long>
    { typedef unsigned long __type; };

  template<>
    struct __add_unsigned<long long>
    { typedef unsigned long long __type; };


  template<>
    struct __add_unsigned<bool>;

  template<>
    struct __add_unsigned<wchar_t>;


  template<typename _Tp>
    struct __remove_unsigned
    {
    private:
      typedef __enable_if<std::__is_integer<_Tp>::__value, _Tp> __if_type;

    public:
      typedef typename __if_type::__type __type;
    };

  template<>
    struct __remove_unsigned<char>
    { typedef signed char __type; };

  template<>
    struct __remove_unsigned<unsigned char>
    { typedef signed char __type; };

  template<>
    struct __remove_unsigned<unsigned short>
    { typedef short __type; };

  template<>
    struct __remove_unsigned<unsigned int>
    { typedef int __type; };

  template<>
    struct __remove_unsigned<unsigned long>
    { typedef long __type; };

  template<>
    struct __remove_unsigned<unsigned long long>
    { typedef long long __type; };


  template<>
    struct __remove_unsigned<bool>;

  template<>
    struct __remove_unsigned<wchar_t>;


  template<typename _Type>
    constexpr
    inline bool
    __is_null_pointer(_Type* __ptr)
    { return __ptr == 0; }

  template<typename _Type>
    constexpr
    inline bool
    __is_null_pointer(_Type)
    { return false; }


  constexpr bool
  __is_null_pointer(std::nullptr_t)
  { return true; }


  template<typename _Tp, bool = std::__is_integer<_Tp>::__value>
    struct __promote
    { typedef double __type; };


  template<typename _Tp>
    struct __promote<_Tp, false>
    { };

  template<>
    struct __promote<long double>
    { typedef long double __type; };

  template<>
    struct __promote<double>
    { typedef double __type; };

  template<>
    struct __promote<float>
    { typedef float __type; };
# 211 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/ext/type_traits.h" 3
  template<typename _Tp, typename _Up,
           typename _Tp2 = typename __promote<_Tp>::__type,
           typename _Up2 = typename __promote<_Up>::__type>
    struct __promote_2
    {
      typedef __typeof__(_Tp2() + _Up2()) __type;
    };

  template<typename _Tp, typename _Up, typename _Vp,
           typename _Tp2 = typename __promote<_Tp>::__type,
           typename _Up2 = typename __promote<_Up>::__type,
           typename _Vp2 = typename __promote<_Vp>::__type>
    struct __promote_3
    {
      typedef __typeof__(_Tp2() + _Up2() + _Vp2()) __type;
    };

  template<typename _Tp, typename _Up, typename _Vp, typename _Wp,
           typename _Tp2 = typename __promote<_Tp>::__type,
           typename _Up2 = typename __promote<_Up>::__type,
           typename _Vp2 = typename __promote<_Vp>::__type,
           typename _Wp2 = typename __promote<_Wp>::__type>
    struct __promote_4
    {
      typedef __typeof__(_Tp2() + _Up2() + _Vp2() + _Wp2()) __type;
    };


}
}
# 44 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 2 3

# 1 "/usr/include/math.h" 1 3 4
# 27 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h" 1 3 4
# 28 "/usr/include/math.h" 2 3 4


extern "C" {


# 1 "/usr/include/x86_64-linux-gnu/bits/types.h" 1 3 4
# 27 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 28 "/usr/include/x86_64-linux-gnu/bits/types.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/timesize.h" 1 3 4
# 19 "/usr/include/x86_64-linux-gnu/bits/timesize.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 20 "/usr/include/x86_64-linux-gnu/bits/timesize.h" 2 3 4
# 29 "/usr/include/x86_64-linux-gnu/bits/types.h" 2 3 4


typedef unsigned char __u_char;
typedef unsigned short int __u_short;
typedef unsigned int __u_int;
typedef unsigned long int __u_long;


typedef signed char __int8_t;
typedef unsigned char __uint8_t;
typedef signed short int __int16_t;
typedef unsigned short int __uint16_t;
typedef signed int __int32_t;
typedef unsigned int __uint32_t;

typedef signed long int __int64_t;
typedef unsigned long int __uint64_t;


typedef __int8_t __int_least8_t;
typedef __uint8_t __uint_least8_t;
typedef __int16_t __int_least16_t;
typedef __uint16_t __uint_least16_t;
typedef __int32_t __int_least32_t;
typedef __uint32_t __uint_least32_t;
typedef __int64_t __int_least64_t;
typedef __uint64_t __uint_least64_t;


typedef long int __quad_t;
typedef unsigned long int __u_quad_t;


typedef long int __intmax_t;
typedef unsigned long int __uintmax_t;
# 141 "/usr/include/x86_64-linux-gnu/bits/types.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/typesizes.h" 1 3 4
# 142 "/usr/include/x86_64-linux-gnu/bits/types.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/time64.h" 1 3 4
# 143 "/usr/include/x86_64-linux-gnu/bits/types.h" 2 3 4


typedef unsigned long int __dev_t;
typedef unsigned int __uid_t;
typedef unsigned int __gid_t;
typedef unsigned long int __ino_t;
typedef unsigned long int __ino64_t;
typedef unsigned int __mode_t;
typedef unsigned long int __nlink_t;
typedef long int __off_t;
typedef long int __off64_t;
typedef int __pid_t;
typedef struct { int __val[2]; } __fsid_t;
typedef long int __clock_t;
typedef unsigned long int __rlim_t;
typedef unsigned long int __rlim64_t;
typedef unsigned int __id_t;
typedef long int __time_t;
typedef unsigned int __useconds_t;
typedef long int __suseconds_t;
typedef long int __suseconds64_t;

typedef int __daddr_t;
typedef int __key_t;


typedef int __clockid_t;


typedef void * __timer_t;


typedef long int __blksize_t;


typedef long int __blkcnt_t;
typedef long int __blkcnt64_t;


typedef unsigned long int __fsblkcnt_t;
typedef unsigned long int __fsblkcnt64_t;


typedef unsigned long int __fsfilcnt_t;
typedef unsigned long int __fsfilcnt64_t;


typedef long int __fsword_t;

typedef long int __ssize_t;


typedef long int __syscall_slong_t;

typedef unsigned long int __syscall_ulong_t;


typedef __off64_t __loff_t;
typedef char *__caddr_t;


typedef long int __intptr_t;


typedef unsigned int __socklen_t;


typedef int __sig_atomic_t;
# 38 "/usr/include/math.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/math-vector.h" 1 3 4
# 25 "/usr/include/x86_64-linux-gnu/bits/math-vector.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/libm-simd-decl-stubs.h" 1 3 4
# 26 "/usr/include/x86_64-linux-gnu/bits/math-vector.h" 2 3 4
# 41 "/usr/include/math.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/floatn.h" 1 3 4
# 119 "/usr/include/x86_64-linux-gnu/bits/floatn.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/floatn-common.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/floatn-common.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/long-double.h" 1 3 4
# 25 "/usr/include/x86_64-linux-gnu/bits/floatn-common.h" 2 3 4
# 214 "/usr/include/x86_64-linux-gnu/bits/floatn-common.h" 3 4
typedef float _Float32;
# 251 "/usr/include/x86_64-linux-gnu/bits/floatn-common.h" 3 4
typedef double _Float64;
# 268 "/usr/include/x86_64-linux-gnu/bits/floatn-common.h" 3 4
typedef double _Float32x;
# 285 "/usr/include/x86_64-linux-gnu/bits/floatn-common.h" 3 4
typedef long double _Float64x;
# 120 "/usr/include/x86_64-linux-gnu/bits/floatn.h" 2 3 4
# 44 "/usr/include/math.h" 2 3 4
# 152 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/flt-eval-method.h" 1 3 4
# 153 "/usr/include/math.h" 2 3 4
# 163 "/usr/include/math.h" 3 4
typedef float float_t;
typedef double double_t;
# 204 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/fp-logb.h" 1 3 4
# 205 "/usr/include/math.h" 2 3 4
# 247 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/fp-fast.h" 1 3 4
# 248 "/usr/include/math.h" 2 3 4


enum
  {
    FP_INT_UPWARD =

      0,
    FP_INT_DOWNWARD =

      1,
    FP_INT_TOWARDZERO =

      2,
    FP_INT_TONEARESTFROMZERO =

      3,
    FP_INT_TONEAREST =

      4,
  };
# 312 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h" 1 3 4
# 20 "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h" 3 4
extern int __fpclassify (double __value) noexcept (true)
     __attribute__ ((__const__));


extern int __signbit (double __value) noexcept (true)
     __attribute__ ((__const__));


extern int __isinf (double __value) noexcept (true)
  __attribute__ ((__const__));


extern int __finite (double __value) noexcept (true)
  __attribute__ ((__const__));


extern int __isnan (double __value) noexcept (true)
  __attribute__ ((__const__));


extern int __iseqsig (double __x, double __y) noexcept (true);


extern int __issignaling (double __value) noexcept (true)
     __attribute__ ((__const__));
# 313 "/usr/include/math.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 1 3 4
# 53 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
 extern double acos (double __x) noexcept (true); extern double __acos (double __x) noexcept (true);

 extern double asin (double __x) noexcept (true); extern double __asin (double __x) noexcept (true);

 extern double atan (double __x) noexcept (true); extern double __atan (double __x) noexcept (true);

 extern double atan2 (double __y, double __x) noexcept (true); extern double __atan2 (double __y, double __x) noexcept (true);


 extern double cos (double __x) noexcept (true); extern double __cos (double __x) noexcept (true);

 extern double sin (double __x) noexcept (true); extern double __sin (double __x) noexcept (true);

 extern double tan (double __x) noexcept (true); extern double __tan (double __x) noexcept (true);


 extern double cosh (double __x) noexcept (true); extern double __cosh (double __x) noexcept (true);

 extern double sinh (double __x) noexcept (true); extern double __sinh (double __x) noexcept (true);

 extern double tanh (double __x) noexcept (true); extern double __tanh (double __x) noexcept (true);


 extern void sincos (double __x, double *__sinx, double *__cosx) noexcept (true); extern void __sincos (double __x, double *__sinx, double *__cosx) noexcept (true);


 extern double acosh (double __x) noexcept (true); extern double __acosh (double __x) noexcept (true);

 extern double asinh (double __x) noexcept (true); extern double __asinh (double __x) noexcept (true);

 extern double atanh (double __x) noexcept (true); extern double __atanh (double __x) noexcept (true);


 extern double exp (double __x) noexcept (true); extern double __exp (double __x) noexcept (true);


extern double frexp (double __x, int *__exponent) noexcept (true); extern double __frexp (double __x, int *__exponent) noexcept (true);


extern double ldexp (double __x, int __exponent) noexcept (true); extern double __ldexp (double __x, int __exponent) noexcept (true);


 extern double log (double __x) noexcept (true); extern double __log (double __x) noexcept (true);


 extern double log10 (double __x) noexcept (true); extern double __log10 (double __x) noexcept (true);


extern double modf (double __x, double *__iptr) noexcept (true); extern double __modf (double __x, double *__iptr) noexcept (true) __attribute__ ((__nonnull__ (2)));


 extern double exp10 (double __x) noexcept (true); extern double __exp10 (double __x) noexcept (true);


 extern double expm1 (double __x) noexcept (true); extern double __expm1 (double __x) noexcept (true);


 extern double log1p (double __x) noexcept (true); extern double __log1p (double __x) noexcept (true);


extern double logb (double __x) noexcept (true); extern double __logb (double __x) noexcept (true);


 extern double exp2 (double __x) noexcept (true); extern double __exp2 (double __x) noexcept (true);


 extern double log2 (double __x) noexcept (true); extern double __log2 (double __x) noexcept (true);


 extern double pow (double __x, double __y) noexcept (true); extern double __pow (double __x, double __y) noexcept (true);


extern double sqrt (double __x) noexcept (true); extern double __sqrt (double __x) noexcept (true);


 extern double hypot (double __x, double __y) noexcept (true); extern double __hypot (double __x, double __y) noexcept (true);


 extern double cbrt (double __x) noexcept (true); extern double __cbrt (double __x) noexcept (true);


extern double ceil (double __x) noexcept (true) __attribute__ ((__const__)); extern double __ceil (double __x) noexcept (true) __attribute__ ((__const__));


extern double fabs (double __x) noexcept (true) __attribute__ ((__const__)); extern double __fabs (double __x) noexcept (true) __attribute__ ((__const__));


extern double floor (double __x) noexcept (true) __attribute__ ((__const__)); extern double __floor (double __x) noexcept (true) __attribute__ ((__const__));


extern double fmod (double __x, double __y) noexcept (true); extern double __fmod (double __x, double __y) noexcept (true);
# 183 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern int finite (double __value) noexcept (true)
  __attribute__ ((__const__));


extern double drem (double __x, double __y) noexcept (true); extern double __drem (double __x, double __y) noexcept (true);


extern double significand (double __x) noexcept (true); extern double __significand (double __x) noexcept (true);


extern double copysign (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __copysign (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double nan (const char *__tagb) noexcept (true); extern double __nan (const char *__tagb) noexcept (true);
# 220 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern double j0 (double) noexcept (true); extern double __j0 (double) noexcept (true);
extern double j1 (double) noexcept (true); extern double __j1 (double) noexcept (true);
extern double jn (int, double) noexcept (true); extern double __jn (int, double) noexcept (true);
extern double y0 (double) noexcept (true); extern double __y0 (double) noexcept (true);
extern double y1 (double) noexcept (true); extern double __y1 (double) noexcept (true);
extern double yn (int, double) noexcept (true); extern double __yn (int, double) noexcept (true);


 extern double erf (double) noexcept (true); extern double __erf (double) noexcept (true);
 extern double erfc (double) noexcept (true); extern double __erfc (double) noexcept (true);
extern double lgamma (double) noexcept (true); extern double __lgamma (double) noexcept (true);


extern double tgamma (double) noexcept (true); extern double __tgamma (double) noexcept (true);


extern double gamma (double) noexcept (true); extern double __gamma (double) noexcept (true);


extern double lgamma_r (double, int *__signgamp) noexcept (true); extern double __lgamma_r (double, int *__signgamp) noexcept (true);


extern double rint (double __x) noexcept (true); extern double __rint (double __x) noexcept (true);


extern double nextafter (double __x, double __y) noexcept (true); extern double __nextafter (double __x, double __y) noexcept (true);

extern double nexttoward (double __x, long double __y) noexcept (true); extern double __nexttoward (double __x, long double __y) noexcept (true);


extern double nextdown (double __x) noexcept (true); extern double __nextdown (double __x) noexcept (true);

extern double nextup (double __x) noexcept (true); extern double __nextup (double __x) noexcept (true);


extern double remainder (double __x, double __y) noexcept (true); extern double __remainder (double __x, double __y) noexcept (true);


extern double scalbn (double __x, int __n) noexcept (true); extern double __scalbn (double __x, int __n) noexcept (true);


extern int ilogb (double __x) noexcept (true); extern int __ilogb (double __x) noexcept (true);


extern long int llogb (double __x) noexcept (true); extern long int __llogb (double __x) noexcept (true);


extern double scalbln (double __x, long int __n) noexcept (true); extern double __scalbln (double __x, long int __n) noexcept (true);


extern double nearbyint (double __x) noexcept (true); extern double __nearbyint (double __x) noexcept (true);


extern double round (double __x) noexcept (true) __attribute__ ((__const__)); extern double __round (double __x) noexcept (true) __attribute__ ((__const__));


extern double trunc (double __x) noexcept (true) __attribute__ ((__const__)); extern double __trunc (double __x) noexcept (true) __attribute__ ((__const__));


extern double remquo (double __x, double __y, int *__quo) noexcept (true); extern double __remquo (double __x, double __y, int *__quo) noexcept (true);


extern long int lrint (double __x) noexcept (true); extern long int __lrint (double __x) noexcept (true);
__extension__
extern long long int llrint (double __x) noexcept (true); extern long long int __llrint (double __x) noexcept (true);


extern long int lround (double __x) noexcept (true); extern long int __lround (double __x) noexcept (true);
__extension__
extern long long int llround (double __x) noexcept (true); extern long long int __llround (double __x) noexcept (true);


extern double fdim (double __x, double __y) noexcept (true); extern double __fdim (double __x, double __y) noexcept (true);


extern double fmax (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fmax (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fmin (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fmin (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fma (double __x, double __y, double __z) noexcept (true); extern double __fma (double __x, double __y, double __z) noexcept (true);


extern double roundeven (double __x) noexcept (true) __attribute__ ((__const__)); extern double __roundeven (double __x) noexcept (true) __attribute__ ((__const__));


extern __intmax_t fromfp (double __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfp (double __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfp (double __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfp (double __x, int __round, unsigned int __width) noexcept (true);


extern __intmax_t fromfpx (double __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpx (double __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpx (double __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpx (double __x, int __round, unsigned int __width) noexcept (true);


extern int canonicalize (double *__cx, const double *__x) noexcept (true);


extern double fmaxmag (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fmaxmag (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fminmag (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fminmag (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fmaximum (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fmaximum (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fminimum (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fminimum (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fmaximum_num (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fmaximum_num (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fminimum_num (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fminimum_num (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fmaximum_mag (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fmaximum_mag (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fminimum_mag (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fminimum_mag (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fmaximum_mag_num (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fmaximum_mag_num (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern double fminimum_mag_num (double __x, double __y) noexcept (true) __attribute__ ((__const__)); extern double __fminimum_mag_num (double __x, double __y) noexcept (true) __attribute__ ((__const__));


extern int totalorder (const double *__x, const double *__y) noexcept (true)

     __attribute__ ((__pure__));


extern int totalordermag (const double *__x, const double *__y) noexcept (true)

     __attribute__ ((__pure__));


extern double getpayload (const double *__x) noexcept (true); extern double __getpayload (const double *__x) noexcept (true);


extern int setpayload (double *__x, double __payload) noexcept (true);


extern int setpayloadsig (double *__x, double __payload) noexcept (true);


extern double scalb (double __x, double __n) noexcept (true); extern double __scalb (double __x, double __n) noexcept (true);
# 314 "/usr/include/math.h" 2 3 4
# 329 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h" 1 3 4
# 20 "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h" 3 4
extern int __fpclassifyf (float __value) noexcept (true)
     __attribute__ ((__const__));


extern int __signbitf (float __value) noexcept (true)
     __attribute__ ((__const__));


extern int __isinff (float __value) noexcept (true)
  __attribute__ ((__const__));


extern int __finitef (float __value) noexcept (true)
  __attribute__ ((__const__));


extern int __isnanf (float __value) noexcept (true)
  __attribute__ ((__const__));


extern int __iseqsigf (float __x, float __y) noexcept (true);


extern int __issignalingf (float __value) noexcept (true)
     __attribute__ ((__const__));
# 330 "/usr/include/math.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 1 3 4
# 53 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
 extern float acosf (float __x) noexcept (true); extern float __acosf (float __x) noexcept (true);

 extern float asinf (float __x) noexcept (true); extern float __asinf (float __x) noexcept (true);

 extern float atanf (float __x) noexcept (true); extern float __atanf (float __x) noexcept (true);

 extern float atan2f (float __y, float __x) noexcept (true); extern float __atan2f (float __y, float __x) noexcept (true);


 extern float cosf (float __x) noexcept (true); extern float __cosf (float __x) noexcept (true);

 extern float sinf (float __x) noexcept (true); extern float __sinf (float __x) noexcept (true);

 extern float tanf (float __x) noexcept (true); extern float __tanf (float __x) noexcept (true);


 extern float coshf (float __x) noexcept (true); extern float __coshf (float __x) noexcept (true);

 extern float sinhf (float __x) noexcept (true); extern float __sinhf (float __x) noexcept (true);

 extern float tanhf (float __x) noexcept (true); extern float __tanhf (float __x) noexcept (true);


 extern void sincosf (float __x, float *__sinx, float *__cosx) noexcept (true); extern void __sincosf (float __x, float *__sinx, float *__cosx) noexcept (true);


 extern float acoshf (float __x) noexcept (true); extern float __acoshf (float __x) noexcept (true);

 extern float asinhf (float __x) noexcept (true); extern float __asinhf (float __x) noexcept (true);

 extern float atanhf (float __x) noexcept (true); extern float __atanhf (float __x) noexcept (true);


 extern float expf (float __x) noexcept (true); extern float __expf (float __x) noexcept (true);


extern float frexpf (float __x, int *__exponent) noexcept (true); extern float __frexpf (float __x, int *__exponent) noexcept (true);


extern float ldexpf (float __x, int __exponent) noexcept (true); extern float __ldexpf (float __x, int __exponent) noexcept (true);


 extern float logf (float __x) noexcept (true); extern float __logf (float __x) noexcept (true);


 extern float log10f (float __x) noexcept (true); extern float __log10f (float __x) noexcept (true);


extern float modff (float __x, float *__iptr) noexcept (true); extern float __modff (float __x, float *__iptr) noexcept (true) __attribute__ ((__nonnull__ (2)));


 extern float exp10f (float __x) noexcept (true); extern float __exp10f (float __x) noexcept (true);


 extern float expm1f (float __x) noexcept (true); extern float __expm1f (float __x) noexcept (true);


 extern float log1pf (float __x) noexcept (true); extern float __log1pf (float __x) noexcept (true);


extern float logbf (float __x) noexcept (true); extern float __logbf (float __x) noexcept (true);


 extern float exp2f (float __x) noexcept (true); extern float __exp2f (float __x) noexcept (true);


 extern float log2f (float __x) noexcept (true); extern float __log2f (float __x) noexcept (true);


 extern float powf (float __x, float __y) noexcept (true); extern float __powf (float __x, float __y) noexcept (true);


extern float sqrtf (float __x) noexcept (true); extern float __sqrtf (float __x) noexcept (true);


 extern float hypotf (float __x, float __y) noexcept (true); extern float __hypotf (float __x, float __y) noexcept (true);


 extern float cbrtf (float __x) noexcept (true); extern float __cbrtf (float __x) noexcept (true);


extern float ceilf (float __x) noexcept (true) __attribute__ ((__const__)); extern float __ceilf (float __x) noexcept (true) __attribute__ ((__const__));


extern float fabsf (float __x) noexcept (true) __attribute__ ((__const__)); extern float __fabsf (float __x) noexcept (true) __attribute__ ((__const__));


extern float floorf (float __x) noexcept (true) __attribute__ ((__const__)); extern float __floorf (float __x) noexcept (true) __attribute__ ((__const__));


extern float fmodf (float __x, float __y) noexcept (true); extern float __fmodf (float __x, float __y) noexcept (true);
# 177 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern int isinff (float __value) noexcept (true)
  __attribute__ ((__const__));


extern int finitef (float __value) noexcept (true)
  __attribute__ ((__const__));


extern float dremf (float __x, float __y) noexcept (true); extern float __dremf (float __x, float __y) noexcept (true);


extern float significandf (float __x) noexcept (true); extern float __significandf (float __x) noexcept (true);


extern float copysignf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __copysignf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float nanf (const char *__tagb) noexcept (true); extern float __nanf (const char *__tagb) noexcept (true);
# 213 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern int isnanf (float __value) noexcept (true)
  __attribute__ ((__const__));


extern float j0f (float) noexcept (true); extern float __j0f (float) noexcept (true);
extern float j1f (float) noexcept (true); extern float __j1f (float) noexcept (true);
extern float jnf (int, float) noexcept (true); extern float __jnf (int, float) noexcept (true);
extern float y0f (float) noexcept (true); extern float __y0f (float) noexcept (true);
extern float y1f (float) noexcept (true); extern float __y1f (float) noexcept (true);
extern float ynf (int, float) noexcept (true); extern float __ynf (int, float) noexcept (true);


 extern float erff (float) noexcept (true); extern float __erff (float) noexcept (true);
 extern float erfcf (float) noexcept (true); extern float __erfcf (float) noexcept (true);
extern float lgammaf (float) noexcept (true); extern float __lgammaf (float) noexcept (true);


extern float tgammaf (float) noexcept (true); extern float __tgammaf (float) noexcept (true);


extern float gammaf (float) noexcept (true); extern float __gammaf (float) noexcept (true);


extern float lgammaf_r (float, int *__signgamp) noexcept (true); extern float __lgammaf_r (float, int *__signgamp) noexcept (true);


extern float rintf (float __x) noexcept (true); extern float __rintf (float __x) noexcept (true);


extern float nextafterf (float __x, float __y) noexcept (true); extern float __nextafterf (float __x, float __y) noexcept (true);

extern float nexttowardf (float __x, long double __y) noexcept (true); extern float __nexttowardf (float __x, long double __y) noexcept (true);


extern float nextdownf (float __x) noexcept (true); extern float __nextdownf (float __x) noexcept (true);

extern float nextupf (float __x) noexcept (true); extern float __nextupf (float __x) noexcept (true);


extern float remainderf (float __x, float __y) noexcept (true); extern float __remainderf (float __x, float __y) noexcept (true);


extern float scalbnf (float __x, int __n) noexcept (true); extern float __scalbnf (float __x, int __n) noexcept (true);


extern int ilogbf (float __x) noexcept (true); extern int __ilogbf (float __x) noexcept (true);


extern long int llogbf (float __x) noexcept (true); extern long int __llogbf (float __x) noexcept (true);


extern float scalblnf (float __x, long int __n) noexcept (true); extern float __scalblnf (float __x, long int __n) noexcept (true);


extern float nearbyintf (float __x) noexcept (true); extern float __nearbyintf (float __x) noexcept (true);


extern float roundf (float __x) noexcept (true) __attribute__ ((__const__)); extern float __roundf (float __x) noexcept (true) __attribute__ ((__const__));


extern float truncf (float __x) noexcept (true) __attribute__ ((__const__)); extern float __truncf (float __x) noexcept (true) __attribute__ ((__const__));


extern float remquof (float __x, float __y, int *__quo) noexcept (true); extern float __remquof (float __x, float __y, int *__quo) noexcept (true);


extern long int lrintf (float __x) noexcept (true); extern long int __lrintf (float __x) noexcept (true);
__extension__
extern long long int llrintf (float __x) noexcept (true); extern long long int __llrintf (float __x) noexcept (true);


extern long int lroundf (float __x) noexcept (true); extern long int __lroundf (float __x) noexcept (true);
__extension__
extern long long int llroundf (float __x) noexcept (true); extern long long int __llroundf (float __x) noexcept (true);


extern float fdimf (float __x, float __y) noexcept (true); extern float __fdimf (float __x, float __y) noexcept (true);


extern float fmaxf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fmaxf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fminf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fminf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fmaf (float __x, float __y, float __z) noexcept (true); extern float __fmaf (float __x, float __y, float __z) noexcept (true);


extern float roundevenf (float __x) noexcept (true) __attribute__ ((__const__)); extern float __roundevenf (float __x) noexcept (true) __attribute__ ((__const__));


extern __intmax_t fromfpf (float __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpf (float __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpf (float __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpf (float __x, int __round, unsigned int __width) noexcept (true);


extern __intmax_t fromfpxf (float __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpxf (float __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpxf (float __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpxf (float __x, int __round, unsigned int __width) noexcept (true);


extern int canonicalizef (float *__cx, const float *__x) noexcept (true);


extern float fmaxmagf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fmaxmagf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fminmagf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fminmagf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fmaximumf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fmaximumf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fminimumf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fminimumf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fmaximum_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fmaximum_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fminimum_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fminimum_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fmaximum_magf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fmaximum_magf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fminimum_magf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fminimum_magf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fmaximum_mag_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fmaximum_mag_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern float fminimum_mag_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__)); extern float __fminimum_mag_numf (float __x, float __y) noexcept (true) __attribute__ ((__const__));


extern int totalorderf (const float *__x, const float *__y) noexcept (true)

     __attribute__ ((__pure__));


extern int totalordermagf (const float *__x, const float *__y) noexcept (true)

     __attribute__ ((__pure__));


extern float getpayloadf (const float *__x) noexcept (true); extern float __getpayloadf (const float *__x) noexcept (true);


extern int setpayloadf (float *__x, float __payload) noexcept (true);


extern int setpayloadsigf (float *__x, float __payload) noexcept (true);


extern float scalbf (float __x, float __n) noexcept (true); extern float __scalbf (float __x, float __n) noexcept (true);
# 331 "/usr/include/math.h" 2 3 4
# 398 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h" 1 3 4
# 20 "/usr/include/x86_64-linux-gnu/bits/mathcalls-helper-functions.h" 3 4
extern int __fpclassifyl (long double __value) noexcept (true)
     __attribute__ ((__const__));


extern int __signbitl (long double __value) noexcept (true)
     __attribute__ ((__const__));


extern int __isinfl (long double __value) noexcept (true)
  __attribute__ ((__const__));


extern int __finitel (long double __value) noexcept (true)
  __attribute__ ((__const__));


extern int __isnanl (long double __value) noexcept (true)
  __attribute__ ((__const__));


extern int __iseqsigl (long double __x, long double __y) noexcept (true);


extern int __issignalingl (long double __value) noexcept (true)
     __attribute__ ((__const__));
# 399 "/usr/include/math.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 1 3 4
# 53 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
 extern long double acosl (long double __x) noexcept (true); extern long double __acosl (long double __x) noexcept (true);

 extern long double asinl (long double __x) noexcept (true); extern long double __asinl (long double __x) noexcept (true);

 extern long double atanl (long double __x) noexcept (true); extern long double __atanl (long double __x) noexcept (true);

 extern long double atan2l (long double __y, long double __x) noexcept (true); extern long double __atan2l (long double __y, long double __x) noexcept (true);


 extern long double cosl (long double __x) noexcept (true); extern long double __cosl (long double __x) noexcept (true);

 extern long double sinl (long double __x) noexcept (true); extern long double __sinl (long double __x) noexcept (true);

 extern long double tanl (long double __x) noexcept (true); extern long double __tanl (long double __x) noexcept (true);


 extern long double coshl (long double __x) noexcept (true); extern long double __coshl (long double __x) noexcept (true);

 extern long double sinhl (long double __x) noexcept (true); extern long double __sinhl (long double __x) noexcept (true);

 extern long double tanhl (long double __x) noexcept (true); extern long double __tanhl (long double __x) noexcept (true);


 extern void sincosl (long double __x, long double *__sinx, long double *__cosx) noexcept (true); extern void __sincosl (long double __x, long double *__sinx, long double *__cosx) noexcept (true);


 extern long double acoshl (long double __x) noexcept (true); extern long double __acoshl (long double __x) noexcept (true);

 extern long double asinhl (long double __x) noexcept (true); extern long double __asinhl (long double __x) noexcept (true);

 extern long double atanhl (long double __x) noexcept (true); extern long double __atanhl (long double __x) noexcept (true);


 extern long double expl (long double __x) noexcept (true); extern long double __expl (long double __x) noexcept (true);


extern long double frexpl (long double __x, int *__exponent) noexcept (true); extern long double __frexpl (long double __x, int *__exponent) noexcept (true);


extern long double ldexpl (long double __x, int __exponent) noexcept (true); extern long double __ldexpl (long double __x, int __exponent) noexcept (true);


 extern long double logl (long double __x) noexcept (true); extern long double __logl (long double __x) noexcept (true);


 extern long double log10l (long double __x) noexcept (true); extern long double __log10l (long double __x) noexcept (true);


extern long double modfl (long double __x, long double *__iptr) noexcept (true); extern long double __modfl (long double __x, long double *__iptr) noexcept (true) __attribute__ ((__nonnull__ (2)));


 extern long double exp10l (long double __x) noexcept (true); extern long double __exp10l (long double __x) noexcept (true);


 extern long double expm1l (long double __x) noexcept (true); extern long double __expm1l (long double __x) noexcept (true);


 extern long double log1pl (long double __x) noexcept (true); extern long double __log1pl (long double __x) noexcept (true);


extern long double logbl (long double __x) noexcept (true); extern long double __logbl (long double __x) noexcept (true);


 extern long double exp2l (long double __x) noexcept (true); extern long double __exp2l (long double __x) noexcept (true);


 extern long double log2l (long double __x) noexcept (true); extern long double __log2l (long double __x) noexcept (true);


 extern long double powl (long double __x, long double __y) noexcept (true); extern long double __powl (long double __x, long double __y) noexcept (true);


extern long double sqrtl (long double __x) noexcept (true); extern long double __sqrtl (long double __x) noexcept (true);


 extern long double hypotl (long double __x, long double __y) noexcept (true); extern long double __hypotl (long double __x, long double __y) noexcept (true);


 extern long double cbrtl (long double __x) noexcept (true); extern long double __cbrtl (long double __x) noexcept (true);


extern long double ceill (long double __x) noexcept (true) __attribute__ ((__const__)); extern long double __ceill (long double __x) noexcept (true) __attribute__ ((__const__));


extern long double fabsl (long double __x) noexcept (true) __attribute__ ((__const__)); extern long double __fabsl (long double __x) noexcept (true) __attribute__ ((__const__));


extern long double floorl (long double __x) noexcept (true) __attribute__ ((__const__)); extern long double __floorl (long double __x) noexcept (true) __attribute__ ((__const__));


extern long double fmodl (long double __x, long double __y) noexcept (true); extern long double __fmodl (long double __x, long double __y) noexcept (true);
# 177 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern int isinfl (long double __value) noexcept (true)
  __attribute__ ((__const__));


extern int finitel (long double __value) noexcept (true)
  __attribute__ ((__const__));


extern long double dreml (long double __x, long double __y) noexcept (true); extern long double __dreml (long double __x, long double __y) noexcept (true);


extern long double significandl (long double __x) noexcept (true); extern long double __significandl (long double __x) noexcept (true);


extern long double copysignl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __copysignl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double nanl (const char *__tagb) noexcept (true); extern long double __nanl (const char *__tagb) noexcept (true);
# 213 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern int isnanl (long double __value) noexcept (true)
  __attribute__ ((__const__));


extern long double j0l (long double) noexcept (true); extern long double __j0l (long double) noexcept (true);
extern long double j1l (long double) noexcept (true); extern long double __j1l (long double) noexcept (true);
extern long double jnl (int, long double) noexcept (true); extern long double __jnl (int, long double) noexcept (true);
extern long double y0l (long double) noexcept (true); extern long double __y0l (long double) noexcept (true);
extern long double y1l (long double) noexcept (true); extern long double __y1l (long double) noexcept (true);
extern long double ynl (int, long double) noexcept (true); extern long double __ynl (int, long double) noexcept (true);


 extern long double erfl (long double) noexcept (true); extern long double __erfl (long double) noexcept (true);
 extern long double erfcl (long double) noexcept (true); extern long double __erfcl (long double) noexcept (true);
extern long double lgammal (long double) noexcept (true); extern long double __lgammal (long double) noexcept (true);


extern long double tgammal (long double) noexcept (true); extern long double __tgammal (long double) noexcept (true);


extern long double gammal (long double) noexcept (true); extern long double __gammal (long double) noexcept (true);


extern long double lgammal_r (long double, int *__signgamp) noexcept (true); extern long double __lgammal_r (long double, int *__signgamp) noexcept (true);


extern long double rintl (long double __x) noexcept (true); extern long double __rintl (long double __x) noexcept (true);


extern long double nextafterl (long double __x, long double __y) noexcept (true); extern long double __nextafterl (long double __x, long double __y) noexcept (true);

extern long double nexttowardl (long double __x, long double __y) noexcept (true); extern long double __nexttowardl (long double __x, long double __y) noexcept (true);


extern long double nextdownl (long double __x) noexcept (true); extern long double __nextdownl (long double __x) noexcept (true);

extern long double nextupl (long double __x) noexcept (true); extern long double __nextupl (long double __x) noexcept (true);


extern long double remainderl (long double __x, long double __y) noexcept (true); extern long double __remainderl (long double __x, long double __y) noexcept (true);


extern long double scalbnl (long double __x, int __n) noexcept (true); extern long double __scalbnl (long double __x, int __n) noexcept (true);


extern int ilogbl (long double __x) noexcept (true); extern int __ilogbl (long double __x) noexcept (true);


extern long int llogbl (long double __x) noexcept (true); extern long int __llogbl (long double __x) noexcept (true);


extern long double scalblnl (long double __x, long int __n) noexcept (true); extern long double __scalblnl (long double __x, long int __n) noexcept (true);


extern long double nearbyintl (long double __x) noexcept (true); extern long double __nearbyintl (long double __x) noexcept (true);


extern long double roundl (long double __x) noexcept (true) __attribute__ ((__const__)); extern long double __roundl (long double __x) noexcept (true) __attribute__ ((__const__));


extern long double truncl (long double __x) noexcept (true) __attribute__ ((__const__)); extern long double __truncl (long double __x) noexcept (true) __attribute__ ((__const__));


extern long double remquol (long double __x, long double __y, int *__quo) noexcept (true); extern long double __remquol (long double __x, long double __y, int *__quo) noexcept (true);


extern long int lrintl (long double __x) noexcept (true); extern long int __lrintl (long double __x) noexcept (true);
__extension__
extern long long int llrintl (long double __x) noexcept (true); extern long long int __llrintl (long double __x) noexcept (true);


extern long int lroundl (long double __x) noexcept (true); extern long int __lroundl (long double __x) noexcept (true);
__extension__
extern long long int llroundl (long double __x) noexcept (true); extern long long int __llroundl (long double __x) noexcept (true);


extern long double fdiml (long double __x, long double __y) noexcept (true); extern long double __fdiml (long double __x, long double __y) noexcept (true);


extern long double fmaxl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fmaxl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fminl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fminl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fmal (long double __x, long double __y, long double __z) noexcept (true); extern long double __fmal (long double __x, long double __y, long double __z) noexcept (true);


extern long double roundevenl (long double __x) noexcept (true) __attribute__ ((__const__)); extern long double __roundevenl (long double __x) noexcept (true) __attribute__ ((__const__));


extern __intmax_t fromfpl (long double __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpl (long double __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpl (long double __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpl (long double __x, int __round, unsigned int __width) noexcept (true);


extern __intmax_t fromfpxl (long double __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpxl (long double __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpxl (long double __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpxl (long double __x, int __round, unsigned int __width) noexcept (true);


extern int canonicalizel (long double *__cx, const long double *__x) noexcept (true);


extern long double fmaxmagl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fmaxmagl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fminmagl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fminmagl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fmaximuml (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fmaximuml (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fminimuml (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fminimuml (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fmaximum_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fmaximum_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fminimum_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fminimum_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fmaximum_magl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fmaximum_magl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fminimum_magl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fminimum_magl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fmaximum_mag_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fmaximum_mag_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern long double fminimum_mag_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__)); extern long double __fminimum_mag_numl (long double __x, long double __y) noexcept (true) __attribute__ ((__const__));


extern int totalorderl (const long double *__x, const long double *__y) noexcept (true)

     __attribute__ ((__pure__));


extern int totalordermagl (const long double *__x, const long double *__y) noexcept (true)

     __attribute__ ((__pure__));


extern long double getpayloadl (const long double *__x) noexcept (true); extern long double __getpayloadl (const long double *__x) noexcept (true);


extern int setpayloadl (long double *__x, long double __payload) noexcept (true);


extern int setpayloadsigl (long double *__x, long double __payload) noexcept (true);


extern long double scalbl (long double __x, long double __n) noexcept (true); extern long double __scalbl (long double __x, long double __n) noexcept (true);
# 400 "/usr/include/math.h" 2 3 4
# 450 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 1 3 4
# 53 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
 extern _Float32 acosf32 (_Float32 __x) noexcept (true); extern _Float32 __acosf32 (_Float32 __x) noexcept (true);

 extern _Float32 asinf32 (_Float32 __x) noexcept (true); extern _Float32 __asinf32 (_Float32 __x) noexcept (true);

 extern _Float32 atanf32 (_Float32 __x) noexcept (true); extern _Float32 __atanf32 (_Float32 __x) noexcept (true);

 extern _Float32 atan2f32 (_Float32 __y, _Float32 __x) noexcept (true); extern _Float32 __atan2f32 (_Float32 __y, _Float32 __x) noexcept (true);


 extern _Float32 cosf32 (_Float32 __x) noexcept (true); extern _Float32 __cosf32 (_Float32 __x) noexcept (true);

 extern _Float32 sinf32 (_Float32 __x) noexcept (true); extern _Float32 __sinf32 (_Float32 __x) noexcept (true);

 extern _Float32 tanf32 (_Float32 __x) noexcept (true); extern _Float32 __tanf32 (_Float32 __x) noexcept (true);


 extern _Float32 coshf32 (_Float32 __x) noexcept (true); extern _Float32 __coshf32 (_Float32 __x) noexcept (true);

 extern _Float32 sinhf32 (_Float32 __x) noexcept (true); extern _Float32 __sinhf32 (_Float32 __x) noexcept (true);

 extern _Float32 tanhf32 (_Float32 __x) noexcept (true); extern _Float32 __tanhf32 (_Float32 __x) noexcept (true);


 extern void sincosf32 (_Float32 __x, _Float32 *__sinx, _Float32 *__cosx) noexcept (true); extern void __sincosf32 (_Float32 __x, _Float32 *__sinx, _Float32 *__cosx) noexcept (true);


 extern _Float32 acoshf32 (_Float32 __x) noexcept (true); extern _Float32 __acoshf32 (_Float32 __x) noexcept (true);

 extern _Float32 asinhf32 (_Float32 __x) noexcept (true); extern _Float32 __asinhf32 (_Float32 __x) noexcept (true);

 extern _Float32 atanhf32 (_Float32 __x) noexcept (true); extern _Float32 __atanhf32 (_Float32 __x) noexcept (true);


 extern _Float32 expf32 (_Float32 __x) noexcept (true); extern _Float32 __expf32 (_Float32 __x) noexcept (true);


extern _Float32 frexpf32 (_Float32 __x, int *__exponent) noexcept (true); extern _Float32 __frexpf32 (_Float32 __x, int *__exponent) noexcept (true);


extern _Float32 ldexpf32 (_Float32 __x, int __exponent) noexcept (true); extern _Float32 __ldexpf32 (_Float32 __x, int __exponent) noexcept (true);


 extern _Float32 logf32 (_Float32 __x) noexcept (true); extern _Float32 __logf32 (_Float32 __x) noexcept (true);


 extern _Float32 log10f32 (_Float32 __x) noexcept (true); extern _Float32 __log10f32 (_Float32 __x) noexcept (true);


extern _Float32 modff32 (_Float32 __x, _Float32 *__iptr) noexcept (true); extern _Float32 __modff32 (_Float32 __x, _Float32 *__iptr) noexcept (true) __attribute__ ((__nonnull__ (2)));


 extern _Float32 exp10f32 (_Float32 __x) noexcept (true); extern _Float32 __exp10f32 (_Float32 __x) noexcept (true);


 extern _Float32 expm1f32 (_Float32 __x) noexcept (true); extern _Float32 __expm1f32 (_Float32 __x) noexcept (true);


 extern _Float32 log1pf32 (_Float32 __x) noexcept (true); extern _Float32 __log1pf32 (_Float32 __x) noexcept (true);


extern _Float32 logbf32 (_Float32 __x) noexcept (true); extern _Float32 __logbf32 (_Float32 __x) noexcept (true);


 extern _Float32 exp2f32 (_Float32 __x) noexcept (true); extern _Float32 __exp2f32 (_Float32 __x) noexcept (true);


 extern _Float32 log2f32 (_Float32 __x) noexcept (true); extern _Float32 __log2f32 (_Float32 __x) noexcept (true);


 extern _Float32 powf32 (_Float32 __x, _Float32 __y) noexcept (true); extern _Float32 __powf32 (_Float32 __x, _Float32 __y) noexcept (true);


extern _Float32 sqrtf32 (_Float32 __x) noexcept (true); extern _Float32 __sqrtf32 (_Float32 __x) noexcept (true);


 extern _Float32 hypotf32 (_Float32 __x, _Float32 __y) noexcept (true); extern _Float32 __hypotf32 (_Float32 __x, _Float32 __y) noexcept (true);


 extern _Float32 cbrtf32 (_Float32 __x) noexcept (true); extern _Float32 __cbrtf32 (_Float32 __x) noexcept (true);


extern _Float32 ceilf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__)); extern _Float32 __ceilf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__));


extern _Float32 fabsf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fabsf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__));


extern _Float32 floorf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__)); extern _Float32 __floorf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__));


extern _Float32 fmodf32 (_Float32 __x, _Float32 __y) noexcept (true); extern _Float32 __fmodf32 (_Float32 __x, _Float32 __y) noexcept (true);
# 198 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float32 copysignf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __copysignf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 nanf32 (const char *__tagb) noexcept (true); extern _Float32 __nanf32 (const char *__tagb) noexcept (true);
# 220 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float32 j0f32 (_Float32) noexcept (true); extern _Float32 __j0f32 (_Float32) noexcept (true);
extern _Float32 j1f32 (_Float32) noexcept (true); extern _Float32 __j1f32 (_Float32) noexcept (true);
extern _Float32 jnf32 (int, _Float32) noexcept (true); extern _Float32 __jnf32 (int, _Float32) noexcept (true);
extern _Float32 y0f32 (_Float32) noexcept (true); extern _Float32 __y0f32 (_Float32) noexcept (true);
extern _Float32 y1f32 (_Float32) noexcept (true); extern _Float32 __y1f32 (_Float32) noexcept (true);
extern _Float32 ynf32 (int, _Float32) noexcept (true); extern _Float32 __ynf32 (int, _Float32) noexcept (true);


 extern _Float32 erff32 (_Float32) noexcept (true); extern _Float32 __erff32 (_Float32) noexcept (true);
 extern _Float32 erfcf32 (_Float32) noexcept (true); extern _Float32 __erfcf32 (_Float32) noexcept (true);
extern _Float32 lgammaf32 (_Float32) noexcept (true); extern _Float32 __lgammaf32 (_Float32) noexcept (true);


extern _Float32 tgammaf32 (_Float32) noexcept (true); extern _Float32 __tgammaf32 (_Float32) noexcept (true);
# 252 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float32 lgammaf32_r (_Float32, int *__signgamp) noexcept (true); extern _Float32 __lgammaf32_r (_Float32, int *__signgamp) noexcept (true);


extern _Float32 rintf32 (_Float32 __x) noexcept (true); extern _Float32 __rintf32 (_Float32 __x) noexcept (true);


extern _Float32 nextafterf32 (_Float32 __x, _Float32 __y) noexcept (true); extern _Float32 __nextafterf32 (_Float32 __x, _Float32 __y) noexcept (true);


extern _Float32 nextdownf32 (_Float32 __x) noexcept (true); extern _Float32 __nextdownf32 (_Float32 __x) noexcept (true);

extern _Float32 nextupf32 (_Float32 __x) noexcept (true); extern _Float32 __nextupf32 (_Float32 __x) noexcept (true);


extern _Float32 remainderf32 (_Float32 __x, _Float32 __y) noexcept (true); extern _Float32 __remainderf32 (_Float32 __x, _Float32 __y) noexcept (true);


extern _Float32 scalbnf32 (_Float32 __x, int __n) noexcept (true); extern _Float32 __scalbnf32 (_Float32 __x, int __n) noexcept (true);


extern int ilogbf32 (_Float32 __x) noexcept (true); extern int __ilogbf32 (_Float32 __x) noexcept (true);


extern long int llogbf32 (_Float32 __x) noexcept (true); extern long int __llogbf32 (_Float32 __x) noexcept (true);


extern _Float32 scalblnf32 (_Float32 __x, long int __n) noexcept (true); extern _Float32 __scalblnf32 (_Float32 __x, long int __n) noexcept (true);


extern _Float32 nearbyintf32 (_Float32 __x) noexcept (true); extern _Float32 __nearbyintf32 (_Float32 __x) noexcept (true);


extern _Float32 roundf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__)); extern _Float32 __roundf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__));


extern _Float32 truncf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__)); extern _Float32 __truncf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__));


extern _Float32 remquof32 (_Float32 __x, _Float32 __y, int *__quo) noexcept (true); extern _Float32 __remquof32 (_Float32 __x, _Float32 __y, int *__quo) noexcept (true);


extern long int lrintf32 (_Float32 __x) noexcept (true); extern long int __lrintf32 (_Float32 __x) noexcept (true);
__extension__
extern long long int llrintf32 (_Float32 __x) noexcept (true); extern long long int __llrintf32 (_Float32 __x) noexcept (true);


extern long int lroundf32 (_Float32 __x) noexcept (true); extern long int __lroundf32 (_Float32 __x) noexcept (true);
__extension__
extern long long int llroundf32 (_Float32 __x) noexcept (true); extern long long int __llroundf32 (_Float32 __x) noexcept (true);


extern _Float32 fdimf32 (_Float32 __x, _Float32 __y) noexcept (true); extern _Float32 __fdimf32 (_Float32 __x, _Float32 __y) noexcept (true);


extern _Float32 fmaxf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fmaxf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fminf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fminf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fmaf32 (_Float32 __x, _Float32 __y, _Float32 __z) noexcept (true); extern _Float32 __fmaf32 (_Float32 __x, _Float32 __y, _Float32 __z) noexcept (true);


extern _Float32 roundevenf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__)); extern _Float32 __roundevenf32 (_Float32 __x) noexcept (true) __attribute__ ((__const__));


extern __intmax_t fromfpf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true);


extern __intmax_t fromfpxf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpxf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpxf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpxf32 (_Float32 __x, int __round, unsigned int __width) noexcept (true);


extern int canonicalizef32 (_Float32 *__cx, const _Float32 *__x) noexcept (true);


extern _Float32 fmaxmagf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fmaxmagf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fminmagf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fminmagf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fmaximumf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fmaximumf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fminimumf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fminimumf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fmaximum_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fmaximum_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fminimum_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fminimum_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fmaximum_magf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fmaximum_magf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fminimum_magf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fminimum_magf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fmaximum_mag_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fmaximum_mag_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern _Float32 fminimum_mag_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__)); extern _Float32 __fminimum_mag_numf32 (_Float32 __x, _Float32 __y) noexcept (true) __attribute__ ((__const__));


extern int totalorderf32 (const _Float32 *__x, const _Float32 *__y) noexcept (true)

     __attribute__ ((__pure__));


extern int totalordermagf32 (const _Float32 *__x, const _Float32 *__y) noexcept (true)

     __attribute__ ((__pure__));


extern _Float32 getpayloadf32 (const _Float32 *__x) noexcept (true); extern _Float32 __getpayloadf32 (const _Float32 *__x) noexcept (true);


extern int setpayloadf32 (_Float32 *__x, _Float32 __payload) noexcept (true);


extern int setpayloadsigf32 (_Float32 *__x, _Float32 __payload) noexcept (true);
# 451 "/usr/include/math.h" 2 3 4
# 467 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 1 3 4
# 53 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
 extern _Float64 acosf64 (_Float64 __x) noexcept (true); extern _Float64 __acosf64 (_Float64 __x) noexcept (true);

 extern _Float64 asinf64 (_Float64 __x) noexcept (true); extern _Float64 __asinf64 (_Float64 __x) noexcept (true);

 extern _Float64 atanf64 (_Float64 __x) noexcept (true); extern _Float64 __atanf64 (_Float64 __x) noexcept (true);

 extern _Float64 atan2f64 (_Float64 __y, _Float64 __x) noexcept (true); extern _Float64 __atan2f64 (_Float64 __y, _Float64 __x) noexcept (true);


 extern _Float64 cosf64 (_Float64 __x) noexcept (true); extern _Float64 __cosf64 (_Float64 __x) noexcept (true);

 extern _Float64 sinf64 (_Float64 __x) noexcept (true); extern _Float64 __sinf64 (_Float64 __x) noexcept (true);

 extern _Float64 tanf64 (_Float64 __x) noexcept (true); extern _Float64 __tanf64 (_Float64 __x) noexcept (true);


 extern _Float64 coshf64 (_Float64 __x) noexcept (true); extern _Float64 __coshf64 (_Float64 __x) noexcept (true);

 extern _Float64 sinhf64 (_Float64 __x) noexcept (true); extern _Float64 __sinhf64 (_Float64 __x) noexcept (true);

 extern _Float64 tanhf64 (_Float64 __x) noexcept (true); extern _Float64 __tanhf64 (_Float64 __x) noexcept (true);


 extern void sincosf64 (_Float64 __x, _Float64 *__sinx, _Float64 *__cosx) noexcept (true); extern void __sincosf64 (_Float64 __x, _Float64 *__sinx, _Float64 *__cosx) noexcept (true);


 extern _Float64 acoshf64 (_Float64 __x) noexcept (true); extern _Float64 __acoshf64 (_Float64 __x) noexcept (true);

 extern _Float64 asinhf64 (_Float64 __x) noexcept (true); extern _Float64 __asinhf64 (_Float64 __x) noexcept (true);

 extern _Float64 atanhf64 (_Float64 __x) noexcept (true); extern _Float64 __atanhf64 (_Float64 __x) noexcept (true);


 extern _Float64 expf64 (_Float64 __x) noexcept (true); extern _Float64 __expf64 (_Float64 __x) noexcept (true);


extern _Float64 frexpf64 (_Float64 __x, int *__exponent) noexcept (true); extern _Float64 __frexpf64 (_Float64 __x, int *__exponent) noexcept (true);


extern _Float64 ldexpf64 (_Float64 __x, int __exponent) noexcept (true); extern _Float64 __ldexpf64 (_Float64 __x, int __exponent) noexcept (true);


 extern _Float64 logf64 (_Float64 __x) noexcept (true); extern _Float64 __logf64 (_Float64 __x) noexcept (true);


 extern _Float64 log10f64 (_Float64 __x) noexcept (true); extern _Float64 __log10f64 (_Float64 __x) noexcept (true);


extern _Float64 modff64 (_Float64 __x, _Float64 *__iptr) noexcept (true); extern _Float64 __modff64 (_Float64 __x, _Float64 *__iptr) noexcept (true) __attribute__ ((__nonnull__ (2)));


 extern _Float64 exp10f64 (_Float64 __x) noexcept (true); extern _Float64 __exp10f64 (_Float64 __x) noexcept (true);


 extern _Float64 expm1f64 (_Float64 __x) noexcept (true); extern _Float64 __expm1f64 (_Float64 __x) noexcept (true);


 extern _Float64 log1pf64 (_Float64 __x) noexcept (true); extern _Float64 __log1pf64 (_Float64 __x) noexcept (true);


extern _Float64 logbf64 (_Float64 __x) noexcept (true); extern _Float64 __logbf64 (_Float64 __x) noexcept (true);


 extern _Float64 exp2f64 (_Float64 __x) noexcept (true); extern _Float64 __exp2f64 (_Float64 __x) noexcept (true);


 extern _Float64 log2f64 (_Float64 __x) noexcept (true); extern _Float64 __log2f64 (_Float64 __x) noexcept (true);


 extern _Float64 powf64 (_Float64 __x, _Float64 __y) noexcept (true); extern _Float64 __powf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float64 sqrtf64 (_Float64 __x) noexcept (true); extern _Float64 __sqrtf64 (_Float64 __x) noexcept (true);


 extern _Float64 hypotf64 (_Float64 __x, _Float64 __y) noexcept (true); extern _Float64 __hypotf64 (_Float64 __x, _Float64 __y) noexcept (true);


 extern _Float64 cbrtf64 (_Float64 __x) noexcept (true); extern _Float64 __cbrtf64 (_Float64 __x) noexcept (true);


extern _Float64 ceilf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__)); extern _Float64 __ceilf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__));


extern _Float64 fabsf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fabsf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__));


extern _Float64 floorf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__)); extern _Float64 __floorf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__));


extern _Float64 fmodf64 (_Float64 __x, _Float64 __y) noexcept (true); extern _Float64 __fmodf64 (_Float64 __x, _Float64 __y) noexcept (true);
# 198 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float64 copysignf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __copysignf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 nanf64 (const char *__tagb) noexcept (true); extern _Float64 __nanf64 (const char *__tagb) noexcept (true);
# 220 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float64 j0f64 (_Float64) noexcept (true); extern _Float64 __j0f64 (_Float64) noexcept (true);
extern _Float64 j1f64 (_Float64) noexcept (true); extern _Float64 __j1f64 (_Float64) noexcept (true);
extern _Float64 jnf64 (int, _Float64) noexcept (true); extern _Float64 __jnf64 (int, _Float64) noexcept (true);
extern _Float64 y0f64 (_Float64) noexcept (true); extern _Float64 __y0f64 (_Float64) noexcept (true);
extern _Float64 y1f64 (_Float64) noexcept (true); extern _Float64 __y1f64 (_Float64) noexcept (true);
extern _Float64 ynf64 (int, _Float64) noexcept (true); extern _Float64 __ynf64 (int, _Float64) noexcept (true);


 extern _Float64 erff64 (_Float64) noexcept (true); extern _Float64 __erff64 (_Float64) noexcept (true);
 extern _Float64 erfcf64 (_Float64) noexcept (true); extern _Float64 __erfcf64 (_Float64) noexcept (true);
extern _Float64 lgammaf64 (_Float64) noexcept (true); extern _Float64 __lgammaf64 (_Float64) noexcept (true);


extern _Float64 tgammaf64 (_Float64) noexcept (true); extern _Float64 __tgammaf64 (_Float64) noexcept (true);
# 252 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float64 lgammaf64_r (_Float64, int *__signgamp) noexcept (true); extern _Float64 __lgammaf64_r (_Float64, int *__signgamp) noexcept (true);


extern _Float64 rintf64 (_Float64 __x) noexcept (true); extern _Float64 __rintf64 (_Float64 __x) noexcept (true);


extern _Float64 nextafterf64 (_Float64 __x, _Float64 __y) noexcept (true); extern _Float64 __nextafterf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float64 nextdownf64 (_Float64 __x) noexcept (true); extern _Float64 __nextdownf64 (_Float64 __x) noexcept (true);

extern _Float64 nextupf64 (_Float64 __x) noexcept (true); extern _Float64 __nextupf64 (_Float64 __x) noexcept (true);


extern _Float64 remainderf64 (_Float64 __x, _Float64 __y) noexcept (true); extern _Float64 __remainderf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float64 scalbnf64 (_Float64 __x, int __n) noexcept (true); extern _Float64 __scalbnf64 (_Float64 __x, int __n) noexcept (true);


extern int ilogbf64 (_Float64 __x) noexcept (true); extern int __ilogbf64 (_Float64 __x) noexcept (true);


extern long int llogbf64 (_Float64 __x) noexcept (true); extern long int __llogbf64 (_Float64 __x) noexcept (true);


extern _Float64 scalblnf64 (_Float64 __x, long int __n) noexcept (true); extern _Float64 __scalblnf64 (_Float64 __x, long int __n) noexcept (true);


extern _Float64 nearbyintf64 (_Float64 __x) noexcept (true); extern _Float64 __nearbyintf64 (_Float64 __x) noexcept (true);


extern _Float64 roundf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__)); extern _Float64 __roundf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__));


extern _Float64 truncf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__)); extern _Float64 __truncf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__));


extern _Float64 remquof64 (_Float64 __x, _Float64 __y, int *__quo) noexcept (true); extern _Float64 __remquof64 (_Float64 __x, _Float64 __y, int *__quo) noexcept (true);


extern long int lrintf64 (_Float64 __x) noexcept (true); extern long int __lrintf64 (_Float64 __x) noexcept (true);
__extension__
extern long long int llrintf64 (_Float64 __x) noexcept (true); extern long long int __llrintf64 (_Float64 __x) noexcept (true);


extern long int lroundf64 (_Float64 __x) noexcept (true); extern long int __lroundf64 (_Float64 __x) noexcept (true);
__extension__
extern long long int llroundf64 (_Float64 __x) noexcept (true); extern long long int __llroundf64 (_Float64 __x) noexcept (true);


extern _Float64 fdimf64 (_Float64 __x, _Float64 __y) noexcept (true); extern _Float64 __fdimf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float64 fmaxf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fmaxf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fminf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fminf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fmaf64 (_Float64 __x, _Float64 __y, _Float64 __z) noexcept (true); extern _Float64 __fmaf64 (_Float64 __x, _Float64 __y, _Float64 __z) noexcept (true);


extern _Float64 roundevenf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__)); extern _Float64 __roundevenf64 (_Float64 __x) noexcept (true) __attribute__ ((__const__));


extern __intmax_t fromfpf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true);


extern __intmax_t fromfpxf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpxf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpxf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpxf64 (_Float64 __x, int __round, unsigned int __width) noexcept (true);


extern int canonicalizef64 (_Float64 *__cx, const _Float64 *__x) noexcept (true);


extern _Float64 fmaxmagf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fmaxmagf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fminmagf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fminmagf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fmaximumf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fmaximumf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fminimumf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fminimumf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fmaximum_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fmaximum_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fminimum_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fminimum_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fmaximum_magf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fmaximum_magf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fminimum_magf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fminimum_magf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fmaximum_mag_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fmaximum_mag_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern _Float64 fminimum_mag_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__)); extern _Float64 __fminimum_mag_numf64 (_Float64 __x, _Float64 __y) noexcept (true) __attribute__ ((__const__));


extern int totalorderf64 (const _Float64 *__x, const _Float64 *__y) noexcept (true)

     __attribute__ ((__pure__));


extern int totalordermagf64 (const _Float64 *__x, const _Float64 *__y) noexcept (true)

     __attribute__ ((__pure__));


extern _Float64 getpayloadf64 (const _Float64 *__x) noexcept (true); extern _Float64 __getpayloadf64 (const _Float64 *__x) noexcept (true);


extern int setpayloadf64 (_Float64 *__x, _Float64 __payload) noexcept (true);


extern int setpayloadsigf64 (_Float64 *__x, _Float64 __payload) noexcept (true);
# 468 "/usr/include/math.h" 2 3 4
# 501 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 1 3 4
# 53 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
 extern _Float32x acosf32x (_Float32x __x) noexcept (true); extern _Float32x __acosf32x (_Float32x __x) noexcept (true);

 extern _Float32x asinf32x (_Float32x __x) noexcept (true); extern _Float32x __asinf32x (_Float32x __x) noexcept (true);

 extern _Float32x atanf32x (_Float32x __x) noexcept (true); extern _Float32x __atanf32x (_Float32x __x) noexcept (true);

 extern _Float32x atan2f32x (_Float32x __y, _Float32x __x) noexcept (true); extern _Float32x __atan2f32x (_Float32x __y, _Float32x __x) noexcept (true);


 extern _Float32x cosf32x (_Float32x __x) noexcept (true); extern _Float32x __cosf32x (_Float32x __x) noexcept (true);

 extern _Float32x sinf32x (_Float32x __x) noexcept (true); extern _Float32x __sinf32x (_Float32x __x) noexcept (true);

 extern _Float32x tanf32x (_Float32x __x) noexcept (true); extern _Float32x __tanf32x (_Float32x __x) noexcept (true);


 extern _Float32x coshf32x (_Float32x __x) noexcept (true); extern _Float32x __coshf32x (_Float32x __x) noexcept (true);

 extern _Float32x sinhf32x (_Float32x __x) noexcept (true); extern _Float32x __sinhf32x (_Float32x __x) noexcept (true);

 extern _Float32x tanhf32x (_Float32x __x) noexcept (true); extern _Float32x __tanhf32x (_Float32x __x) noexcept (true);


 extern void sincosf32x (_Float32x __x, _Float32x *__sinx, _Float32x *__cosx) noexcept (true); extern void __sincosf32x (_Float32x __x, _Float32x *__sinx, _Float32x *__cosx) noexcept (true);


 extern _Float32x acoshf32x (_Float32x __x) noexcept (true); extern _Float32x __acoshf32x (_Float32x __x) noexcept (true);

 extern _Float32x asinhf32x (_Float32x __x) noexcept (true); extern _Float32x __asinhf32x (_Float32x __x) noexcept (true);

 extern _Float32x atanhf32x (_Float32x __x) noexcept (true); extern _Float32x __atanhf32x (_Float32x __x) noexcept (true);


 extern _Float32x expf32x (_Float32x __x) noexcept (true); extern _Float32x __expf32x (_Float32x __x) noexcept (true);


extern _Float32x frexpf32x (_Float32x __x, int *__exponent) noexcept (true); extern _Float32x __frexpf32x (_Float32x __x, int *__exponent) noexcept (true);


extern _Float32x ldexpf32x (_Float32x __x, int __exponent) noexcept (true); extern _Float32x __ldexpf32x (_Float32x __x, int __exponent) noexcept (true);


 extern _Float32x logf32x (_Float32x __x) noexcept (true); extern _Float32x __logf32x (_Float32x __x) noexcept (true);


 extern _Float32x log10f32x (_Float32x __x) noexcept (true); extern _Float32x __log10f32x (_Float32x __x) noexcept (true);


extern _Float32x modff32x (_Float32x __x, _Float32x *__iptr) noexcept (true); extern _Float32x __modff32x (_Float32x __x, _Float32x *__iptr) noexcept (true) __attribute__ ((__nonnull__ (2)));


 extern _Float32x exp10f32x (_Float32x __x) noexcept (true); extern _Float32x __exp10f32x (_Float32x __x) noexcept (true);


 extern _Float32x expm1f32x (_Float32x __x) noexcept (true); extern _Float32x __expm1f32x (_Float32x __x) noexcept (true);


 extern _Float32x log1pf32x (_Float32x __x) noexcept (true); extern _Float32x __log1pf32x (_Float32x __x) noexcept (true);


extern _Float32x logbf32x (_Float32x __x) noexcept (true); extern _Float32x __logbf32x (_Float32x __x) noexcept (true);


 extern _Float32x exp2f32x (_Float32x __x) noexcept (true); extern _Float32x __exp2f32x (_Float32x __x) noexcept (true);


 extern _Float32x log2f32x (_Float32x __x) noexcept (true); extern _Float32x __log2f32x (_Float32x __x) noexcept (true);


 extern _Float32x powf32x (_Float32x __x, _Float32x __y) noexcept (true); extern _Float32x __powf32x (_Float32x __x, _Float32x __y) noexcept (true);


extern _Float32x sqrtf32x (_Float32x __x) noexcept (true); extern _Float32x __sqrtf32x (_Float32x __x) noexcept (true);


 extern _Float32x hypotf32x (_Float32x __x, _Float32x __y) noexcept (true); extern _Float32x __hypotf32x (_Float32x __x, _Float32x __y) noexcept (true);


 extern _Float32x cbrtf32x (_Float32x __x) noexcept (true); extern _Float32x __cbrtf32x (_Float32x __x) noexcept (true);


extern _Float32x ceilf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__)); extern _Float32x __ceilf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__));


extern _Float32x fabsf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fabsf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__));


extern _Float32x floorf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__)); extern _Float32x __floorf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__));


extern _Float32x fmodf32x (_Float32x __x, _Float32x __y) noexcept (true); extern _Float32x __fmodf32x (_Float32x __x, _Float32x __y) noexcept (true);
# 198 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float32x copysignf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __copysignf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x nanf32x (const char *__tagb) noexcept (true); extern _Float32x __nanf32x (const char *__tagb) noexcept (true);
# 220 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float32x j0f32x (_Float32x) noexcept (true); extern _Float32x __j0f32x (_Float32x) noexcept (true);
extern _Float32x j1f32x (_Float32x) noexcept (true); extern _Float32x __j1f32x (_Float32x) noexcept (true);
extern _Float32x jnf32x (int, _Float32x) noexcept (true); extern _Float32x __jnf32x (int, _Float32x) noexcept (true);
extern _Float32x y0f32x (_Float32x) noexcept (true); extern _Float32x __y0f32x (_Float32x) noexcept (true);
extern _Float32x y1f32x (_Float32x) noexcept (true); extern _Float32x __y1f32x (_Float32x) noexcept (true);
extern _Float32x ynf32x (int, _Float32x) noexcept (true); extern _Float32x __ynf32x (int, _Float32x) noexcept (true);


 extern _Float32x erff32x (_Float32x) noexcept (true); extern _Float32x __erff32x (_Float32x) noexcept (true);
 extern _Float32x erfcf32x (_Float32x) noexcept (true); extern _Float32x __erfcf32x (_Float32x) noexcept (true);
extern _Float32x lgammaf32x (_Float32x) noexcept (true); extern _Float32x __lgammaf32x (_Float32x) noexcept (true);


extern _Float32x tgammaf32x (_Float32x) noexcept (true); extern _Float32x __tgammaf32x (_Float32x) noexcept (true);
# 252 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float32x lgammaf32x_r (_Float32x, int *__signgamp) noexcept (true); extern _Float32x __lgammaf32x_r (_Float32x, int *__signgamp) noexcept (true);


extern _Float32x rintf32x (_Float32x __x) noexcept (true); extern _Float32x __rintf32x (_Float32x __x) noexcept (true);


extern _Float32x nextafterf32x (_Float32x __x, _Float32x __y) noexcept (true); extern _Float32x __nextafterf32x (_Float32x __x, _Float32x __y) noexcept (true);


extern _Float32x nextdownf32x (_Float32x __x) noexcept (true); extern _Float32x __nextdownf32x (_Float32x __x) noexcept (true);

extern _Float32x nextupf32x (_Float32x __x) noexcept (true); extern _Float32x __nextupf32x (_Float32x __x) noexcept (true);


extern _Float32x remainderf32x (_Float32x __x, _Float32x __y) noexcept (true); extern _Float32x __remainderf32x (_Float32x __x, _Float32x __y) noexcept (true);


extern _Float32x scalbnf32x (_Float32x __x, int __n) noexcept (true); extern _Float32x __scalbnf32x (_Float32x __x, int __n) noexcept (true);


extern int ilogbf32x (_Float32x __x) noexcept (true); extern int __ilogbf32x (_Float32x __x) noexcept (true);


extern long int llogbf32x (_Float32x __x) noexcept (true); extern long int __llogbf32x (_Float32x __x) noexcept (true);


extern _Float32x scalblnf32x (_Float32x __x, long int __n) noexcept (true); extern _Float32x __scalblnf32x (_Float32x __x, long int __n) noexcept (true);


extern _Float32x nearbyintf32x (_Float32x __x) noexcept (true); extern _Float32x __nearbyintf32x (_Float32x __x) noexcept (true);


extern _Float32x roundf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__)); extern _Float32x __roundf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__));


extern _Float32x truncf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__)); extern _Float32x __truncf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__));


extern _Float32x remquof32x (_Float32x __x, _Float32x __y, int *__quo) noexcept (true); extern _Float32x __remquof32x (_Float32x __x, _Float32x __y, int *__quo) noexcept (true);


extern long int lrintf32x (_Float32x __x) noexcept (true); extern long int __lrintf32x (_Float32x __x) noexcept (true);
__extension__
extern long long int llrintf32x (_Float32x __x) noexcept (true); extern long long int __llrintf32x (_Float32x __x) noexcept (true);


extern long int lroundf32x (_Float32x __x) noexcept (true); extern long int __lroundf32x (_Float32x __x) noexcept (true);
__extension__
extern long long int llroundf32x (_Float32x __x) noexcept (true); extern long long int __llroundf32x (_Float32x __x) noexcept (true);


extern _Float32x fdimf32x (_Float32x __x, _Float32x __y) noexcept (true); extern _Float32x __fdimf32x (_Float32x __x, _Float32x __y) noexcept (true);


extern _Float32x fmaxf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fmaxf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fminf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fminf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fmaf32x (_Float32x __x, _Float32x __y, _Float32x __z) noexcept (true); extern _Float32x __fmaf32x (_Float32x __x, _Float32x __y, _Float32x __z) noexcept (true);


extern _Float32x roundevenf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__)); extern _Float32x __roundevenf32x (_Float32x __x) noexcept (true) __attribute__ ((__const__));


extern __intmax_t fromfpf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true);


extern __intmax_t fromfpxf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpxf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpxf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpxf32x (_Float32x __x, int __round, unsigned int __width) noexcept (true);


extern int canonicalizef32x (_Float32x *__cx, const _Float32x *__x) noexcept (true);


extern _Float32x fmaxmagf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fmaxmagf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fminmagf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fminmagf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fmaximumf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fmaximumf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fminimumf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fminimumf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fmaximum_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fmaximum_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fminimum_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fminimum_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fmaximum_magf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fmaximum_magf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fminimum_magf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fminimum_magf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fmaximum_mag_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fmaximum_mag_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern _Float32x fminimum_mag_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__)); extern _Float32x __fminimum_mag_numf32x (_Float32x __x, _Float32x __y) noexcept (true) __attribute__ ((__const__));


extern int totalorderf32x (const _Float32x *__x, const _Float32x *__y) noexcept (true)

     __attribute__ ((__pure__));


extern int totalordermagf32x (const _Float32x *__x, const _Float32x *__y) noexcept (true)

     __attribute__ ((__pure__));


extern _Float32x getpayloadf32x (const _Float32x *__x) noexcept (true); extern _Float32x __getpayloadf32x (const _Float32x *__x) noexcept (true);


extern int setpayloadf32x (_Float32x *__x, _Float32x __payload) noexcept (true);


extern int setpayloadsigf32x (_Float32x *__x, _Float32x __payload) noexcept (true);
# 502 "/usr/include/math.h" 2 3 4
# 518 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 1 3 4
# 53 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
 extern _Float64x acosf64x (_Float64x __x) noexcept (true); extern _Float64x __acosf64x (_Float64x __x) noexcept (true);

 extern _Float64x asinf64x (_Float64x __x) noexcept (true); extern _Float64x __asinf64x (_Float64x __x) noexcept (true);

 extern _Float64x atanf64x (_Float64x __x) noexcept (true); extern _Float64x __atanf64x (_Float64x __x) noexcept (true);

 extern _Float64x atan2f64x (_Float64x __y, _Float64x __x) noexcept (true); extern _Float64x __atan2f64x (_Float64x __y, _Float64x __x) noexcept (true);


 extern _Float64x cosf64x (_Float64x __x) noexcept (true); extern _Float64x __cosf64x (_Float64x __x) noexcept (true);

 extern _Float64x sinf64x (_Float64x __x) noexcept (true); extern _Float64x __sinf64x (_Float64x __x) noexcept (true);

 extern _Float64x tanf64x (_Float64x __x) noexcept (true); extern _Float64x __tanf64x (_Float64x __x) noexcept (true);


 extern _Float64x coshf64x (_Float64x __x) noexcept (true); extern _Float64x __coshf64x (_Float64x __x) noexcept (true);

 extern _Float64x sinhf64x (_Float64x __x) noexcept (true); extern _Float64x __sinhf64x (_Float64x __x) noexcept (true);

 extern _Float64x tanhf64x (_Float64x __x) noexcept (true); extern _Float64x __tanhf64x (_Float64x __x) noexcept (true);


 extern void sincosf64x (_Float64x __x, _Float64x *__sinx, _Float64x *__cosx) noexcept (true); extern void __sincosf64x (_Float64x __x, _Float64x *__sinx, _Float64x *__cosx) noexcept (true);


 extern _Float64x acoshf64x (_Float64x __x) noexcept (true); extern _Float64x __acoshf64x (_Float64x __x) noexcept (true);

 extern _Float64x asinhf64x (_Float64x __x) noexcept (true); extern _Float64x __asinhf64x (_Float64x __x) noexcept (true);

 extern _Float64x atanhf64x (_Float64x __x) noexcept (true); extern _Float64x __atanhf64x (_Float64x __x) noexcept (true);


 extern _Float64x expf64x (_Float64x __x) noexcept (true); extern _Float64x __expf64x (_Float64x __x) noexcept (true);


extern _Float64x frexpf64x (_Float64x __x, int *__exponent) noexcept (true); extern _Float64x __frexpf64x (_Float64x __x, int *__exponent) noexcept (true);


extern _Float64x ldexpf64x (_Float64x __x, int __exponent) noexcept (true); extern _Float64x __ldexpf64x (_Float64x __x, int __exponent) noexcept (true);


 extern _Float64x logf64x (_Float64x __x) noexcept (true); extern _Float64x __logf64x (_Float64x __x) noexcept (true);


 extern _Float64x log10f64x (_Float64x __x) noexcept (true); extern _Float64x __log10f64x (_Float64x __x) noexcept (true);


extern _Float64x modff64x (_Float64x __x, _Float64x *__iptr) noexcept (true); extern _Float64x __modff64x (_Float64x __x, _Float64x *__iptr) noexcept (true) __attribute__ ((__nonnull__ (2)));


 extern _Float64x exp10f64x (_Float64x __x) noexcept (true); extern _Float64x __exp10f64x (_Float64x __x) noexcept (true);


 extern _Float64x expm1f64x (_Float64x __x) noexcept (true); extern _Float64x __expm1f64x (_Float64x __x) noexcept (true);


 extern _Float64x log1pf64x (_Float64x __x) noexcept (true); extern _Float64x __log1pf64x (_Float64x __x) noexcept (true);


extern _Float64x logbf64x (_Float64x __x) noexcept (true); extern _Float64x __logbf64x (_Float64x __x) noexcept (true);


 extern _Float64x exp2f64x (_Float64x __x) noexcept (true); extern _Float64x __exp2f64x (_Float64x __x) noexcept (true);


 extern _Float64x log2f64x (_Float64x __x) noexcept (true); extern _Float64x __log2f64x (_Float64x __x) noexcept (true);


 extern _Float64x powf64x (_Float64x __x, _Float64x __y) noexcept (true); extern _Float64x __powf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float64x sqrtf64x (_Float64x __x) noexcept (true); extern _Float64x __sqrtf64x (_Float64x __x) noexcept (true);


 extern _Float64x hypotf64x (_Float64x __x, _Float64x __y) noexcept (true); extern _Float64x __hypotf64x (_Float64x __x, _Float64x __y) noexcept (true);


 extern _Float64x cbrtf64x (_Float64x __x) noexcept (true); extern _Float64x __cbrtf64x (_Float64x __x) noexcept (true);


extern _Float64x ceilf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__)); extern _Float64x __ceilf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__));


extern _Float64x fabsf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fabsf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__));


extern _Float64x floorf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__)); extern _Float64x __floorf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__));


extern _Float64x fmodf64x (_Float64x __x, _Float64x __y) noexcept (true); extern _Float64x __fmodf64x (_Float64x __x, _Float64x __y) noexcept (true);
# 198 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float64x copysignf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __copysignf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x nanf64x (const char *__tagb) noexcept (true); extern _Float64x __nanf64x (const char *__tagb) noexcept (true);
# 220 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float64x j0f64x (_Float64x) noexcept (true); extern _Float64x __j0f64x (_Float64x) noexcept (true);
extern _Float64x j1f64x (_Float64x) noexcept (true); extern _Float64x __j1f64x (_Float64x) noexcept (true);
extern _Float64x jnf64x (int, _Float64x) noexcept (true); extern _Float64x __jnf64x (int, _Float64x) noexcept (true);
extern _Float64x y0f64x (_Float64x) noexcept (true); extern _Float64x __y0f64x (_Float64x) noexcept (true);
extern _Float64x y1f64x (_Float64x) noexcept (true); extern _Float64x __y1f64x (_Float64x) noexcept (true);
extern _Float64x ynf64x (int, _Float64x) noexcept (true); extern _Float64x __ynf64x (int, _Float64x) noexcept (true);


 extern _Float64x erff64x (_Float64x) noexcept (true); extern _Float64x __erff64x (_Float64x) noexcept (true);
 extern _Float64x erfcf64x (_Float64x) noexcept (true); extern _Float64x __erfcf64x (_Float64x) noexcept (true);
extern _Float64x lgammaf64x (_Float64x) noexcept (true); extern _Float64x __lgammaf64x (_Float64x) noexcept (true);


extern _Float64x tgammaf64x (_Float64x) noexcept (true); extern _Float64x __tgammaf64x (_Float64x) noexcept (true);
# 252 "/usr/include/x86_64-linux-gnu/bits/mathcalls.h" 3 4
extern _Float64x lgammaf64x_r (_Float64x, int *__signgamp) noexcept (true); extern _Float64x __lgammaf64x_r (_Float64x, int *__signgamp) noexcept (true);


extern _Float64x rintf64x (_Float64x __x) noexcept (true); extern _Float64x __rintf64x (_Float64x __x) noexcept (true);


extern _Float64x nextafterf64x (_Float64x __x, _Float64x __y) noexcept (true); extern _Float64x __nextafterf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float64x nextdownf64x (_Float64x __x) noexcept (true); extern _Float64x __nextdownf64x (_Float64x __x) noexcept (true);

extern _Float64x nextupf64x (_Float64x __x) noexcept (true); extern _Float64x __nextupf64x (_Float64x __x) noexcept (true);


extern _Float64x remainderf64x (_Float64x __x, _Float64x __y) noexcept (true); extern _Float64x __remainderf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float64x scalbnf64x (_Float64x __x, int __n) noexcept (true); extern _Float64x __scalbnf64x (_Float64x __x, int __n) noexcept (true);


extern int ilogbf64x (_Float64x __x) noexcept (true); extern int __ilogbf64x (_Float64x __x) noexcept (true);


extern long int llogbf64x (_Float64x __x) noexcept (true); extern long int __llogbf64x (_Float64x __x) noexcept (true);


extern _Float64x scalblnf64x (_Float64x __x, long int __n) noexcept (true); extern _Float64x __scalblnf64x (_Float64x __x, long int __n) noexcept (true);


extern _Float64x nearbyintf64x (_Float64x __x) noexcept (true); extern _Float64x __nearbyintf64x (_Float64x __x) noexcept (true);


extern _Float64x roundf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__)); extern _Float64x __roundf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__));


extern _Float64x truncf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__)); extern _Float64x __truncf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__));


extern _Float64x remquof64x (_Float64x __x, _Float64x __y, int *__quo) noexcept (true); extern _Float64x __remquof64x (_Float64x __x, _Float64x __y, int *__quo) noexcept (true);


extern long int lrintf64x (_Float64x __x) noexcept (true); extern long int __lrintf64x (_Float64x __x) noexcept (true);
__extension__
extern long long int llrintf64x (_Float64x __x) noexcept (true); extern long long int __llrintf64x (_Float64x __x) noexcept (true);


extern long int lroundf64x (_Float64x __x) noexcept (true); extern long int __lroundf64x (_Float64x __x) noexcept (true);
__extension__
extern long long int llroundf64x (_Float64x __x) noexcept (true); extern long long int __llroundf64x (_Float64x __x) noexcept (true);


extern _Float64x fdimf64x (_Float64x __x, _Float64x __y) noexcept (true); extern _Float64x __fdimf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float64x fmaxf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fmaxf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fminf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fminf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fmaf64x (_Float64x __x, _Float64x __y, _Float64x __z) noexcept (true); extern _Float64x __fmaf64x (_Float64x __x, _Float64x __y, _Float64x __z) noexcept (true);


extern _Float64x roundevenf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__)); extern _Float64x __roundevenf64x (_Float64x __x) noexcept (true) __attribute__ ((__const__));


extern __intmax_t fromfpf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true);


extern __intmax_t fromfpxf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true); extern __intmax_t __fromfpxf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true);


extern __uintmax_t ufromfpxf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true); extern __uintmax_t __ufromfpxf64x (_Float64x __x, int __round, unsigned int __width) noexcept (true);


extern int canonicalizef64x (_Float64x *__cx, const _Float64x *__x) noexcept (true);


extern _Float64x fmaxmagf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fmaxmagf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fminmagf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fminmagf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fmaximumf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fmaximumf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fminimumf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fminimumf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fmaximum_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fmaximum_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fminimum_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fminimum_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fmaximum_magf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fmaximum_magf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fminimum_magf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fminimum_magf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fmaximum_mag_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fmaximum_mag_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern _Float64x fminimum_mag_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__)); extern _Float64x __fminimum_mag_numf64x (_Float64x __x, _Float64x __y) noexcept (true) __attribute__ ((__const__));


extern int totalorderf64x (const _Float64x *__x, const _Float64x *__y) noexcept (true)

     __attribute__ ((__pure__));


extern int totalordermagf64x (const _Float64x *__x, const _Float64x *__y) noexcept (true)

     __attribute__ ((__pure__));


extern _Float64x getpayloadf64x (const _Float64x *__x) noexcept (true); extern _Float64x __getpayloadf64x (const _Float64x *__x) noexcept (true);


extern int setpayloadf64x (_Float64x *__x, _Float64x __payload) noexcept (true);


extern int setpayloadsigf64x (_Float64x *__x, _Float64x __payload) noexcept (true);
# 519 "/usr/include/math.h" 2 3 4
# 566 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern float fadd (double __x, double __y) noexcept (true);


extern float fdiv (double __x, double __y) noexcept (true);


extern float ffma (double __x, double __y, double __z) noexcept (true);


extern float fmul (double __x, double __y) noexcept (true);


extern float fsqrt (double __x) noexcept (true);


extern float fsub (double __x, double __y) noexcept (true);
# 567 "/usr/include/math.h" 2 3 4
# 587 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern float faddl (long double __x, long double __y) noexcept (true);


extern float fdivl (long double __x, long double __y) noexcept (true);


extern float ffmal (long double __x, long double __y, long double __z) noexcept (true);


extern float fmull (long double __x, long double __y) noexcept (true);


extern float fsqrtl (long double __x) noexcept (true);


extern float fsubl (long double __x, long double __y) noexcept (true);
# 588 "/usr/include/math.h" 2 3 4
# 616 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern double daddl (long double __x, long double __y) noexcept (true);


extern double ddivl (long double __x, long double __y) noexcept (true);


extern double dfmal (long double __x, long double __y, long double __z) noexcept (true);


extern double dmull (long double __x, long double __y) noexcept (true);


extern double dsqrtl (long double __x) noexcept (true);


extern double dsubl (long double __x, long double __y) noexcept (true);
# 617 "/usr/include/math.h" 2 3 4
# 697 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern _Float32 f32addf32x (_Float32x __x, _Float32x __y) noexcept (true);


extern _Float32 f32divf32x (_Float32x __x, _Float32x __y) noexcept (true);


extern _Float32 f32fmaf32x (_Float32x __x, _Float32x __y, _Float32x __z) noexcept (true);


extern _Float32 f32mulf32x (_Float32x __x, _Float32x __y) noexcept (true);


extern _Float32 f32sqrtf32x (_Float32x __x) noexcept (true);


extern _Float32 f32subf32x (_Float32x __x, _Float32x __y) noexcept (true);
# 698 "/usr/include/math.h" 2 3 4
# 707 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern _Float32 f32addf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float32 f32divf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float32 f32fmaf64 (_Float64 __x, _Float64 __y, _Float64 __z) noexcept (true);


extern _Float32 f32mulf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float32 f32sqrtf64 (_Float64 __x) noexcept (true);


extern _Float32 f32subf64 (_Float64 __x, _Float64 __y) noexcept (true);
# 708 "/usr/include/math.h" 2 3 4
# 717 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern _Float32 f32addf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float32 f32divf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float32 f32fmaf64x (_Float64x __x, _Float64x __y, _Float64x __z) noexcept (true);


extern _Float32 f32mulf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float32 f32sqrtf64x (_Float64x __x) noexcept (true);


extern _Float32 f32subf64x (_Float64x __x, _Float64x __y) noexcept (true);
# 718 "/usr/include/math.h" 2 3 4
# 747 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern _Float32x f32xaddf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float32x f32xdivf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float32x f32xfmaf64 (_Float64 __x, _Float64 __y, _Float64 __z) noexcept (true);


extern _Float32x f32xmulf64 (_Float64 __x, _Float64 __y) noexcept (true);


extern _Float32x f32xsqrtf64 (_Float64 __x) noexcept (true);


extern _Float32x f32xsubf64 (_Float64 __x, _Float64 __y) noexcept (true);
# 748 "/usr/include/math.h" 2 3 4
# 757 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern _Float32x f32xaddf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float32x f32xdivf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float32x f32xfmaf64x (_Float64x __x, _Float64x __y, _Float64x __z) noexcept (true);


extern _Float32x f32xmulf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float32x f32xsqrtf64x (_Float64x __x) noexcept (true);


extern _Float32x f32xsubf64x (_Float64x __x, _Float64x __y) noexcept (true);
# 758 "/usr/include/math.h" 2 3 4
# 787 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/mathcalls-narrow.h" 3 4
extern _Float64 f64addf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float64 f64divf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float64 f64fmaf64x (_Float64x __x, _Float64x __y, _Float64x __z) noexcept (true);


extern _Float64 f64mulf64x (_Float64x __x, _Float64x __y) noexcept (true);


extern _Float64 f64sqrtf64x (_Float64x __x) noexcept (true);


extern _Float64 f64subf64x (_Float64x __x, _Float64x __y) noexcept (true);
# 788 "/usr/include/math.h" 2 3 4
# 854 "/usr/include/math.h" 3 4
extern int signgam;
# 934 "/usr/include/math.h" 3 4
enum
  {
    FP_NAN =

      0,
    FP_INFINITE =

      1,
    FP_ZERO =

      2,
    FP_SUBNORMAL =

      3,
    FP_NORMAL =

      4
  };
# 1054 "/usr/include/math.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/iscanonical.h" 1 3 4
# 23 "/usr/include/x86_64-linux-gnu/bits/iscanonical.h" 3 4
extern int __iscanonicall (long double __x)
     noexcept (true) __attribute__ ((__const__));
# 46 "/usr/include/x86_64-linux-gnu/bits/iscanonical.h" 3 4
extern "C++" {
inline int iscanonical (float __val) { return ((void) (__typeof (__val)) (__val), 1); }
inline int iscanonical (double __val) { return ((void) (__typeof (__val)) (__val), 1); }
inline int iscanonical (long double __val) { return __iscanonicall (__val); }


}
# 1055 "/usr/include/math.h" 2 3 4
# 1066 "/usr/include/math.h" 3 4
extern "C++" {
inline int issignaling (float __val) { return __issignalingf (__val); }
inline int issignaling (double __val) { return __issignaling (__val); }
inline int
issignaling (long double __val)
{


  return __issignalingl (__val);

}


}
# 1097 "/usr/include/math.h" 3 4
extern "C++" {
# 1128 "/usr/include/math.h" 3 4
template <class __T> inline bool
iszero (__T __val)
{
  return __val == 0;
}

}
# 1363 "/usr/include/math.h" 3 4
extern "C++" {
template<typename> struct __iseqsig_type;

template<> struct __iseqsig_type<float>
{
  static int __call (float __x, float __y) throw ()
  {
    return __iseqsigf (__x, __y);
  }
};

template<> struct __iseqsig_type<double>
{
  static int __call (double __x, double __y) throw ()
  {
    return __iseqsig (__x, __y);
  }
};

template<> struct __iseqsig_type<long double>
{
  static int __call (long double __x, long double __y) throw ()
  {

    return __iseqsigl (__x, __y);


  }
};
# 1406 "/usr/include/math.h" 3 4
template<typename _T1, typename _T2>
inline int
iseqsig (_T1 __x, _T2 __y) throw ()
{

  typedef decltype (((__x) + (__y) + 0.0f)) _T3;


  return __iseqsig_type<_T3>::__call (__x, __y);
}

}


}
# 46 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 2 3

# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/std_abs.h" 1 3
# 34 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/std_abs.h" 3


# 1 "/usr/include/stdlib.h" 1 3 4
# 26 "/usr/include/stdlib.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h" 1 3 4
# 27 "/usr/include/stdlib.h" 2 3 4


# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 1 3 4
# 46 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 3 4
typedef long unsigned int size_t;
# 33 "/usr/include/stdlib.h" 2 3 4

extern "C" {


# 1 "/usr/include/x86_64-linux-gnu/bits/waitflags.h" 1 3 4
# 41 "/usr/include/stdlib.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/waitstatus.h" 1 3 4
# 42 "/usr/include/stdlib.h" 2 3 4
# 59 "/usr/include/stdlib.h" 3 4
typedef struct
  {
    int quot;
    int rem;
  } div_t;


typedef struct
  {
    long int quot;
    long int rem;
  } ldiv_t;


__extension__ typedef struct
  {
    long long int quot;
    long long int rem;
  } lldiv_t;
# 98 "/usr/include/stdlib.h" 3 4
extern size_t __ctype_get_mb_cur_max (void) noexcept (true) ;


extern double atof (const char *__nptr)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1))) ;

extern int atoi (const char *__nptr)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1))) ;

extern long int atol (const char *__nptr)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1))) ;


__extension__ extern long long int atoll (const char *__nptr)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1))) ;


extern double strtod (const char *__restrict __nptr,
        char **__restrict __endptr)
     noexcept (true) __attribute__ ((__nonnull__ (1)));


extern float strtof (const char *__restrict __nptr,
       char **__restrict __endptr) noexcept (true) __attribute__ ((__nonnull__ (1)));

extern long double strtold (const char *__restrict __nptr,
       char **__restrict __endptr)
     noexcept (true) __attribute__ ((__nonnull__ (1)));
# 141 "/usr/include/stdlib.h" 3 4
extern _Float32 strtof32 (const char *__restrict __nptr,
     char **__restrict __endptr)
     noexcept (true) __attribute__ ((__nonnull__ (1)));


extern _Float64 strtof64 (const char *__restrict __nptr,
     char **__restrict __endptr)
     noexcept (true) __attribute__ ((__nonnull__ (1)));
# 159 "/usr/include/stdlib.h" 3 4
extern _Float32x strtof32x (const char *__restrict __nptr,
       char **__restrict __endptr)
     noexcept (true) __attribute__ ((__nonnull__ (1)));


extern _Float64x strtof64x (const char *__restrict __nptr,
       char **__restrict __endptr)
     noexcept (true) __attribute__ ((__nonnull__ (1)));
# 177 "/usr/include/stdlib.h" 3 4
extern long int strtol (const char *__restrict __nptr,
   char **__restrict __endptr, int __base)
     noexcept (true) __attribute__ ((__nonnull__ (1)));

extern unsigned long int strtoul (const char *__restrict __nptr,
      char **__restrict __endptr, int __base)
     noexcept (true) __attribute__ ((__nonnull__ (1)));


__extension__
extern long long int strtoq (const char *__restrict __nptr,
        char **__restrict __endptr, int __base)
     noexcept (true) __attribute__ ((__nonnull__ (1)));

__extension__
extern unsigned long long int strtouq (const char *__restrict __nptr,
           char **__restrict __endptr, int __base)
     noexcept (true) __attribute__ ((__nonnull__ (1)));


__extension__
extern long long int strtoll (const char *__restrict __nptr,
         char **__restrict __endptr, int __base)
     noexcept (true) __attribute__ ((__nonnull__ (1)));

__extension__
extern unsigned long long int strtoull (const char *__restrict __nptr,
     char **__restrict __endptr, int __base)
     noexcept (true) __attribute__ ((__nonnull__ (1)));


extern int strfromd (char *__dest, size_t __size, const char *__format,
       double __f)
     noexcept (true) __attribute__ ((__nonnull__ (3)));

extern int strfromf (char *__dest, size_t __size, const char *__format,
       float __f)
     noexcept (true) __attribute__ ((__nonnull__ (3)));

extern int strfroml (char *__dest, size_t __size, const char *__format,
       long double __f)
     noexcept (true) __attribute__ ((__nonnull__ (3)));
# 233 "/usr/include/stdlib.h" 3 4
extern int strfromf32 (char *__dest, size_t __size, const char * __format,
         _Float32 __f)
     noexcept (true) __attribute__ ((__nonnull__ (3)));


extern int strfromf64 (char *__dest, size_t __size, const char * __format,
         _Float64 __f)
     noexcept (true) __attribute__ ((__nonnull__ (3)));
# 251 "/usr/include/stdlib.h" 3 4
extern int strfromf32x (char *__dest, size_t __size, const char * __format,
   _Float32x __f)
     noexcept (true) __attribute__ ((__nonnull__ (3)));


extern int strfromf64x (char *__dest, size_t __size, const char * __format,
   _Float64x __f)
     noexcept (true) __attribute__ ((__nonnull__ (3)));
# 273 "/usr/include/stdlib.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/locale_t.h" 1 3 4
# 22 "/usr/include/x86_64-linux-gnu/bits/types/locale_t.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/__locale_t.h" 1 3 4
# 27 "/usr/include/x86_64-linux-gnu/bits/types/__locale_t.h" 3 4
struct __locale_struct
{

  struct __locale_data *__locales[13];


  const unsigned short int *__ctype_b;
  const int *__ctype_tolower;
  const int *__ctype_toupper;


  const char *__names[13];
};

typedef struct __locale_struct *__locale_t;
# 23 "/usr/include/x86_64-linux-gnu/bits/types/locale_t.h" 2 3 4

typedef __locale_t locale_t;
# 274 "/usr/include/stdlib.h" 2 3 4

extern long int strtol_l (const char *__restrict __nptr,
     char **__restrict __endptr, int __base,
     locale_t __loc) noexcept (true) __attribute__ ((__nonnull__ (1, 4)));

extern unsigned long int strtoul_l (const char *__restrict __nptr,
        char **__restrict __endptr,
        int __base, locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 4)));

__extension__
extern long long int strtoll_l (const char *__restrict __nptr,
    char **__restrict __endptr, int __base,
    locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 4)));

__extension__
extern unsigned long long int strtoull_l (const char *__restrict __nptr,
       char **__restrict __endptr,
       int __base, locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 4)));

extern double strtod_l (const char *__restrict __nptr,
   char **__restrict __endptr, locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 3)));

extern float strtof_l (const char *__restrict __nptr,
         char **__restrict __endptr, locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 3)));

extern long double strtold_l (const char *__restrict __nptr,
         char **__restrict __endptr,
         locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 3)));
# 317 "/usr/include/stdlib.h" 3 4
extern _Float32 strtof32_l (const char *__restrict __nptr,
       char **__restrict __endptr,
       locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 3)));


extern _Float64 strtof64_l (const char *__restrict __nptr,
       char **__restrict __endptr,
       locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 3)));
# 338 "/usr/include/stdlib.h" 3 4
extern _Float32x strtof32x_l (const char *__restrict __nptr,
         char **__restrict __endptr,
         locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 3)));


extern _Float64x strtof64x_l (const char *__restrict __nptr,
         char **__restrict __endptr,
         locale_t __loc)
     noexcept (true) __attribute__ ((__nonnull__ (1, 3)));
# 386 "/usr/include/stdlib.h" 3 4
extern char *l64a (long int __n) noexcept (true) ;


extern long int a64l (const char *__s)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1))) ;


# 1 "/usr/include/x86_64-linux-gnu/sys/types.h" 1 3 4
# 27 "/usr/include/x86_64-linux-gnu/sys/types.h" 3 4
extern "C" {


typedef __u_char u_char;
typedef __u_short u_short;
typedef __u_int u_int;
typedef __u_long u_long;
typedef __quad_t quad_t;
typedef __u_quad_t u_quad_t;
typedef __fsid_t fsid_t;


typedef __loff_t loff_t;


typedef __ino_t ino_t;


typedef __ino64_t ino64_t;


typedef __dev_t dev_t;


typedef __gid_t gid_t;


typedef __mode_t mode_t;


typedef __nlink_t nlink_t;


typedef __uid_t uid_t;


typedef __off_t off_t;


typedef __off64_t off64_t;


typedef __pid_t pid_t;


typedef __id_t id_t;


typedef __ssize_t ssize_t;


typedef __daddr_t daddr_t;
typedef __caddr_t caddr_t;


typedef __key_t key_t;


# 1 "/usr/include/x86_64-linux-gnu/bits/types/clock_t.h" 1 3 4


typedef __clock_t clock_t;
# 127 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4

# 1 "/usr/include/x86_64-linux-gnu/bits/types/clockid_t.h" 1 3 4


typedef __clockid_t clockid_t;
# 129 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/time_t.h" 1 3 4
# 10 "/usr/include/x86_64-linux-gnu/bits/types/time_t.h" 3 4
typedef __time_t time_t;
# 130 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/timer_t.h" 1 3 4


typedef __timer_t timer_t;
# 131 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4


typedef __useconds_t useconds_t;


typedef __suseconds_t suseconds_t;


# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 1 3 4
# 145 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4


typedef unsigned long int ulong;
typedef unsigned short int ushort;
typedef unsigned int uint;


# 1 "/usr/include/x86_64-linux-gnu/bits/stdint-intn.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/stdint-intn.h" 3 4
typedef __int8_t int8_t;
typedef __int16_t int16_t;
typedef __int32_t int32_t;
typedef __int64_t int64_t;
# 156 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4


typedef __uint8_t u_int8_t;
typedef __uint16_t u_int16_t;
typedef __uint32_t u_int32_t;
typedef __uint64_t u_int64_t;


typedef int register_t __attribute__ ((__mode__ (__word__)));
# 176 "/usr/include/x86_64-linux-gnu/sys/types.h" 3 4
# 1 "/usr/include/endian.h" 1 3 4
# 24 "/usr/include/endian.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/endian.h" 1 3 4
# 35 "/usr/include/x86_64-linux-gnu/bits/endian.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/endianness.h" 1 3 4
# 36 "/usr/include/x86_64-linux-gnu/bits/endian.h" 2 3 4
# 25 "/usr/include/endian.h" 2 3 4
# 35 "/usr/include/endian.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/byteswap.h" 1 3 4
# 33 "/usr/include/x86_64-linux-gnu/bits/byteswap.h" 3 4
static __inline __uint16_t
__bswap_16 (__uint16_t __bsx)
{


  return ((__uint16_t) ((((__bsx) >> 8) & 0xff) | (((__bsx) & 0xff) << 8)));

}


static __inline __uint32_t
__bswap_32 (__uint32_t __bsx)
{


  return ((((__bsx) & 0xff000000u) >> 24) | (((__bsx) & 0x00ff0000u) >> 8) | (((__bsx) & 0x0000ff00u) << 8) | (((__bsx) & 0x000000ffu) << 24));

}
# 69 "/usr/include/x86_64-linux-gnu/bits/byteswap.h" 3 4
__extension__ static __inline __uint64_t
__bswap_64 (__uint64_t __bsx)
{


  return ((((__bsx) & 0xff00000000000000ull) >> 56) | (((__bsx) & 0x00ff000000000000ull) >> 40) | (((__bsx) & 0x0000ff0000000000ull) >> 24) | (((__bsx) & 0x000000ff00000000ull) >> 8) | (((__bsx) & 0x00000000ff000000ull) << 8) | (((__bsx) & 0x0000000000ff0000ull) << 24) | (((__bsx) & 0x000000000000ff00ull) << 40) | (((__bsx) & 0x00000000000000ffull) << 56));

}
# 36 "/usr/include/endian.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/uintn-identity.h" 1 3 4
# 32 "/usr/include/x86_64-linux-gnu/bits/uintn-identity.h" 3 4
static __inline __uint16_t
__uint16_identity (__uint16_t __x)
{
  return __x;
}

static __inline __uint32_t
__uint32_identity (__uint32_t __x)
{
  return __x;
}

static __inline __uint64_t
__uint64_identity (__uint64_t __x)
{
  return __x;
}
# 37 "/usr/include/endian.h" 2 3 4
# 177 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/sys/select.h" 1 3 4
# 30 "/usr/include/x86_64-linux-gnu/sys/select.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/select.h" 1 3 4
# 31 "/usr/include/x86_64-linux-gnu/sys/select.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/types/sigset_t.h" 1 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/types/__sigset_t.h" 1 3 4


typedef struct
{
  unsigned long int __val[(1024 / (8 * sizeof (unsigned long int)))];
} __sigset_t;
# 5 "/usr/include/x86_64-linux-gnu/bits/types/sigset_t.h" 2 3 4


typedef __sigset_t sigset_t;
# 34 "/usr/include/x86_64-linux-gnu/sys/select.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/types/struct_timeval.h" 1 3 4


struct timeval
{


  __time_t tv_sec;
  __suseconds_t tv_usec;

};
# 38 "/usr/include/x86_64-linux-gnu/sys/select.h" 2 3 4

# 1 "/usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h" 1 3 4
# 11 "/usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h" 3 4
struct timespec
{


  __time_t tv_sec;


  __syscall_slong_t tv_nsec;
# 31 "/usr/include/x86_64-linux-gnu/bits/types/struct_timespec.h" 3 4
};
# 40 "/usr/include/x86_64-linux-gnu/sys/select.h" 2 3 4
# 49 "/usr/include/x86_64-linux-gnu/sys/select.h" 3 4
typedef long int __fd_mask;
# 59 "/usr/include/x86_64-linux-gnu/sys/select.h" 3 4
typedef struct
  {


    __fd_mask fds_bits[1024 / (8 * (int) sizeof (__fd_mask))];


  } fd_set;


typedef __fd_mask fd_mask;
# 91 "/usr/include/x86_64-linux-gnu/sys/select.h" 3 4
extern "C" {
# 102 "/usr/include/x86_64-linux-gnu/sys/select.h" 3 4
extern int select (int __nfds, fd_set *__restrict __readfds,
     fd_set *__restrict __writefds,
     fd_set *__restrict __exceptfds,
     struct timeval *__restrict __timeout);
# 127 "/usr/include/x86_64-linux-gnu/sys/select.h" 3 4
extern int pselect (int __nfds, fd_set *__restrict __readfds,
      fd_set *__restrict __writefds,
      fd_set *__restrict __exceptfds,
      const struct timespec *__restrict __timeout,
      const __sigset_t *__restrict __sigmask);
# 153 "/usr/include/x86_64-linux-gnu/sys/select.h" 3 4
}
# 180 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4


typedef __blksize_t blksize_t;


typedef __blkcnt_t blkcnt_t;


typedef __fsblkcnt_t fsblkcnt_t;


typedef __fsfilcnt_t fsfilcnt_t;
# 219 "/usr/include/x86_64-linux-gnu/sys/types.h" 3 4
typedef __blkcnt64_t blkcnt64_t;
typedef __fsblkcnt64_t fsblkcnt64_t;
typedef __fsfilcnt64_t fsfilcnt64_t;


# 1 "/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h" 1 3 4
# 23 "/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 1 3 4
# 44 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h" 1 3 4
# 21 "/usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 22 "/usr/include/x86_64-linux-gnu/bits/pthreadtypes-arch.h" 2 3 4
# 45 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 2 3 4

# 1 "/usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h" 1 3 4
# 25 "/usr/include/x86_64-linux-gnu/bits/atomic_wide_counter.h" 3 4
typedef union
{
  __extension__ unsigned long long int __value64;
  struct
  {
    unsigned int __low;
    unsigned int __high;
  } __value32;
} __atomic_wide_counter;
# 47 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 2 3 4


typedef struct __pthread_internal_list
{
  struct __pthread_internal_list *__prev;
  struct __pthread_internal_list *__next;
} __pthread_list_t;

typedef struct __pthread_internal_slist
{
  struct __pthread_internal_slist *__next;
} __pthread_slist_t;
# 76 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/struct_mutex.h" 1 3 4
# 22 "/usr/include/x86_64-linux-gnu/bits/struct_mutex.h" 3 4
struct __pthread_mutex_s
{
  int __lock;
  unsigned int __count;
  int __owner;

  unsigned int __nusers;


  int __kind;

  short __spins;
  short __elision;
  __pthread_list_t __list;
# 53 "/usr/include/x86_64-linux-gnu/bits/struct_mutex.h" 3 4
};
# 77 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 2 3 4
# 89 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/struct_rwlock.h" 1 3 4
# 23 "/usr/include/x86_64-linux-gnu/bits/struct_rwlock.h" 3 4
struct __pthread_rwlock_arch_t
{
  unsigned int __readers;
  unsigned int __writers;
  unsigned int __wrphase_futex;
  unsigned int __writers_futex;
  unsigned int __pad3;
  unsigned int __pad4;

  int __cur_writer;
  int __shared;
  signed char __rwelision;


  unsigned char __pad1[7];


  unsigned long int __pad2;


  unsigned int __flags;
# 55 "/usr/include/x86_64-linux-gnu/bits/struct_rwlock.h" 3 4
};
# 90 "/usr/include/x86_64-linux-gnu/bits/thread-shared-types.h" 2 3 4


struct __pthread_cond_s
{
  __atomic_wide_counter __wseq;
  __atomic_wide_counter __g1_start;
  unsigned int __g_refs[2] ;
  unsigned int __g_size[2];
  unsigned int __g1_orig_size;
  unsigned int __wrefs;
  unsigned int __g_signals[2];
};

typedef unsigned int __tss_t;
typedef unsigned long int __thrd_t;

typedef struct
{
  int __data ;
} __once_flag;
# 24 "/usr/include/x86_64-linux-gnu/bits/pthreadtypes.h" 2 3 4


typedef unsigned long int pthread_t;


typedef union
{
  char __size[4];
  int __align;
} pthread_mutexattr_t;


typedef union
{
  char __size[4];
  int __align;
} pthread_condattr_t;


typedef unsigned int pthread_key_t;


typedef int pthread_once_t;


union pthread_attr_t
{
  char __size[56];
  long int __align;
};

typedef union pthread_attr_t pthread_attr_t;


typedef union
{
  struct __pthread_mutex_s __data;
  char __size[40];
  long int __align;
} pthread_mutex_t;


typedef union
{
  struct __pthread_cond_s __data;
  char __size[48];
  __extension__ long long int __align;
} pthread_cond_t;


typedef union
{
  struct __pthread_rwlock_arch_t __data;
  char __size[56];
  long int __align;
} pthread_rwlock_t;

typedef union
{
  char __size[8];
  long int __align;
} pthread_rwlockattr_t;


typedef volatile int pthread_spinlock_t;


typedef union
{
  char __size[32];
  long int __align;
} pthread_barrier_t;

typedef union
{
  char __size[4];
  int __align;
} pthread_barrierattr_t;
# 228 "/usr/include/x86_64-linux-gnu/sys/types.h" 2 3 4


}
# 396 "/usr/include/stdlib.h" 2 3 4


extern long int random (void) noexcept (true);


extern void srandom (unsigned int __seed) noexcept (true);


extern char *initstate (unsigned int __seed, char *__statebuf,
   size_t __statelen) noexcept (true) __attribute__ ((__nonnull__ (2)));


extern char *setstate (char *__statebuf) noexcept (true) __attribute__ ((__nonnull__ (1)));


struct random_data
  {
    int32_t *fptr;
    int32_t *rptr;
    int32_t *state;
    int rand_type;
    int rand_deg;
    int rand_sep;
    int32_t *end_ptr;
  };

extern int random_r (struct random_data *__restrict __buf,
       int32_t *__restrict __result) noexcept (true) __attribute__ ((__nonnull__ (1, 2)));

extern int srandom_r (unsigned int __seed, struct random_data *__buf)
     noexcept (true) __attribute__ ((__nonnull__ (2)));

extern int initstate_r (unsigned int __seed, char *__restrict __statebuf,
   size_t __statelen,
   struct random_data *__restrict __buf)
     noexcept (true) __attribute__ ((__nonnull__ (2, 4)));

extern int setstate_r (char *__restrict __statebuf,
         struct random_data *__restrict __buf)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern int rand (void) noexcept (true);

extern void srand (unsigned int __seed) noexcept (true);


extern int rand_r (unsigned int *__seed) noexcept (true);


extern double drand48 (void) noexcept (true);
extern double erand48 (unsigned short int __xsubi[3]) noexcept (true) __attribute__ ((__nonnull__ (1)));


extern long int lrand48 (void) noexcept (true);
extern long int nrand48 (unsigned short int __xsubi[3])
     noexcept (true) __attribute__ ((__nonnull__ (1)));


extern long int mrand48 (void) noexcept (true);
extern long int jrand48 (unsigned short int __xsubi[3])
     noexcept (true) __attribute__ ((__nonnull__ (1)));


extern void srand48 (long int __seedval) noexcept (true);
extern unsigned short int *seed48 (unsigned short int __seed16v[3])
     noexcept (true) __attribute__ ((__nonnull__ (1)));
extern void lcong48 (unsigned short int __param[7]) noexcept (true) __attribute__ ((__nonnull__ (1)));


struct drand48_data
  {
    unsigned short int __x[3];
    unsigned short int __old_x[3];
    unsigned short int __c;
    unsigned short int __init;
    __extension__ unsigned long long int __a;

  };


extern int drand48_r (struct drand48_data *__restrict __buffer,
        double *__restrict __result) noexcept (true) __attribute__ ((__nonnull__ (1, 2)));
extern int erand48_r (unsigned short int __xsubi[3],
        struct drand48_data *__restrict __buffer,
        double *__restrict __result) noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern int lrand48_r (struct drand48_data *__restrict __buffer,
        long int *__restrict __result)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));
extern int nrand48_r (unsigned short int __xsubi[3],
        struct drand48_data *__restrict __buffer,
        long int *__restrict __result)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern int mrand48_r (struct drand48_data *__restrict __buffer,
        long int *__restrict __result)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));
extern int jrand48_r (unsigned short int __xsubi[3],
        struct drand48_data *__restrict __buffer,
        long int *__restrict __result)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern int srand48_r (long int __seedval, struct drand48_data *__buffer)
     noexcept (true) __attribute__ ((__nonnull__ (2)));

extern int seed48_r (unsigned short int __seed16v[3],
       struct drand48_data *__buffer) noexcept (true) __attribute__ ((__nonnull__ (1, 2)));

extern int lcong48_r (unsigned short int __param[7],
        struct drand48_data *__buffer)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern void *malloc (size_t __size) noexcept (true) __attribute__ ((__malloc__))
                                         ;

extern void *calloc (size_t __nmemb, size_t __size)
     noexcept (true) __attribute__ ((__malloc__)) ;


extern void *realloc (void *__ptr, size_t __size)
     noexcept (true) __attribute__ ((__warn_unused_result__)) ;


extern void free (void *__ptr) noexcept (true);


extern void *reallocarray (void *__ptr, size_t __nmemb, size_t __size)
     noexcept (true) __attribute__ ((__warn_unused_result__))

                       ;


extern void *reallocarray (void *__ptr, size_t __nmemb, size_t __size)
     noexcept (true) ;


# 1 "/usr/include/alloca.h" 1 3 4
# 24 "/usr/include/alloca.h" 3 4
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 1 3 4
# 25 "/usr/include/alloca.h" 2 3 4

extern "C" {


extern void *alloca (size_t __size) noexcept (true);


}
# 575 "/usr/include/stdlib.h" 2 3 4


extern void *valloc (size_t __size) noexcept (true) __attribute__ ((__malloc__))
                                         ;


extern int posix_memalign (void **__memptr, size_t __alignment, size_t __size)
     noexcept (true) __attribute__ ((__nonnull__ (1))) ;


extern void *aligned_alloc (size_t __alignment, size_t __size)
     noexcept (true) __attribute__ ((__malloc__)) __attribute__ ((__alloc_align__ (1)))
                                         ;


extern void abort (void) noexcept (true) __attribute__ ((__noreturn__));


extern int atexit (void (*__func) (void)) noexcept (true) __attribute__ ((__nonnull__ (1)));


extern "C++" int at_quick_exit (void (*__func) (void))
     noexcept (true) __asm ("at_quick_exit") __attribute__ ((__nonnull__ (1)));
# 617 "/usr/include/stdlib.h" 3 4
extern int on_exit (void (*__func) (int __status, void *__arg), void *__arg)
     noexcept (true) __attribute__ ((__nonnull__ (1)));


extern void exit (int __status) noexcept (true) __attribute__ ((__noreturn__));


extern void quick_exit (int __status) noexcept (true) __attribute__ ((__noreturn__));


extern void _Exit (int __status) noexcept (true) __attribute__ ((__noreturn__));


extern char *getenv (const char *__name) noexcept (true) __attribute__ ((__nonnull__ (1))) ;


extern char *secure_getenv (const char *__name)
     noexcept (true) __attribute__ ((__nonnull__ (1))) ;


extern int putenv (char *__string) noexcept (true) __attribute__ ((__nonnull__ (1)));


extern int setenv (const char *__name, const char *__value, int __replace)
     noexcept (true) __attribute__ ((__nonnull__ (2)));


extern int unsetenv (const char *__name) noexcept (true) __attribute__ ((__nonnull__ (1)));


extern int clearenv (void) noexcept (true);
# 682 "/usr/include/stdlib.h" 3 4
extern char *mktemp (char *__template) noexcept (true) __attribute__ ((__nonnull__ (1)));
# 695 "/usr/include/stdlib.h" 3 4
extern int mkstemp (char *__template) __attribute__ ((__nonnull__ (1))) ;
# 705 "/usr/include/stdlib.h" 3 4
extern int mkstemp64 (char *__template) __attribute__ ((__nonnull__ (1))) ;
# 717 "/usr/include/stdlib.h" 3 4
extern int mkstemps (char *__template, int __suffixlen) __attribute__ ((__nonnull__ (1))) ;
# 727 "/usr/include/stdlib.h" 3 4
extern int mkstemps64 (char *__template, int __suffixlen)
     __attribute__ ((__nonnull__ (1))) ;
# 738 "/usr/include/stdlib.h" 3 4
extern char *mkdtemp (char *__template) noexcept (true) __attribute__ ((__nonnull__ (1))) ;
# 749 "/usr/include/stdlib.h" 3 4
extern int mkostemp (char *__template, int __flags) __attribute__ ((__nonnull__ (1))) ;
# 759 "/usr/include/stdlib.h" 3 4
extern int mkostemp64 (char *__template, int __flags) __attribute__ ((__nonnull__ (1))) ;
# 769 "/usr/include/stdlib.h" 3 4
extern int mkostemps (char *__template, int __suffixlen, int __flags)
     __attribute__ ((__nonnull__ (1))) ;
# 781 "/usr/include/stdlib.h" 3 4
extern int mkostemps64 (char *__template, int __suffixlen, int __flags)
     __attribute__ ((__nonnull__ (1))) ;
# 791 "/usr/include/stdlib.h" 3 4
extern int system (const char *__command) ;


extern char *canonicalize_file_name (const char *__name)
     noexcept (true) __attribute__ ((__nonnull__ (1))) __attribute__ ((__malloc__))
                              ;
# 808 "/usr/include/stdlib.h" 3 4
extern char *realpath (const char *__restrict __name,
         char *__restrict __resolved) noexcept (true) ;


typedef int (*__compar_fn_t) (const void *, const void *);


typedef __compar_fn_t comparison_fn_t;


typedef int (*__compar_d_fn_t) (const void *, const void *, void *);


extern void *bsearch (const void *__key, const void *__base,
        size_t __nmemb, size_t __size, __compar_fn_t __compar)
     __attribute__ ((__nonnull__ (1, 2, 5))) ;


extern void qsort (void *__base, size_t __nmemb, size_t __size,
     __compar_fn_t __compar) __attribute__ ((__nonnull__ (1, 4)));

extern void qsort_r (void *__base, size_t __nmemb, size_t __size,
       __compar_d_fn_t __compar, void *__arg)
  __attribute__ ((__nonnull__ (1, 4)));


extern int abs (int __x) noexcept (true) __attribute__ ((__const__)) ;
extern long int labs (long int __x) noexcept (true) __attribute__ ((__const__)) ;


__extension__ extern long long int llabs (long long int __x)
     noexcept (true) __attribute__ ((__const__)) ;


extern div_t div (int __numer, int __denom)
     noexcept (true) __attribute__ ((__const__)) ;
extern ldiv_t ldiv (long int __numer, long int __denom)
     noexcept (true) __attribute__ ((__const__)) ;


__extension__ extern lldiv_t lldiv (long long int __numer,
        long long int __denom)
     noexcept (true) __attribute__ ((__const__)) ;
# 880 "/usr/include/stdlib.h" 3 4
extern char *ecvt (double __value, int __ndigit, int *__restrict __decpt,
     int *__restrict __sign) noexcept (true) __attribute__ ((__nonnull__ (3, 4))) ;


extern char *fcvt (double __value, int __ndigit, int *__restrict __decpt,
     int *__restrict __sign) noexcept (true) __attribute__ ((__nonnull__ (3, 4))) ;


extern char *gcvt (double __value, int __ndigit, char *__buf)
     noexcept (true) __attribute__ ((__nonnull__ (3))) ;


extern char *qecvt (long double __value, int __ndigit,
      int *__restrict __decpt, int *__restrict __sign)
     noexcept (true) __attribute__ ((__nonnull__ (3, 4))) ;
extern char *qfcvt (long double __value, int __ndigit,
      int *__restrict __decpt, int *__restrict __sign)
     noexcept (true) __attribute__ ((__nonnull__ (3, 4))) ;
extern char *qgcvt (long double __value, int __ndigit, char *__buf)
     noexcept (true) __attribute__ ((__nonnull__ (3))) ;


extern int ecvt_r (double __value, int __ndigit, int *__restrict __decpt,
     int *__restrict __sign, char *__restrict __buf,
     size_t __len) noexcept (true) __attribute__ ((__nonnull__ (3, 4, 5)));
extern int fcvt_r (double __value, int __ndigit, int *__restrict __decpt,
     int *__restrict __sign, char *__restrict __buf,
     size_t __len) noexcept (true) __attribute__ ((__nonnull__ (3, 4, 5)));

extern int qecvt_r (long double __value, int __ndigit,
      int *__restrict __decpt, int *__restrict __sign,
      char *__restrict __buf, size_t __len)
     noexcept (true) __attribute__ ((__nonnull__ (3, 4, 5)));
extern int qfcvt_r (long double __value, int __ndigit,
      int *__restrict __decpt, int *__restrict __sign,
      char *__restrict __buf, size_t __len)
     noexcept (true) __attribute__ ((__nonnull__ (3, 4, 5)));


extern int mblen (const char *__s, size_t __n) noexcept (true);


extern int mbtowc (wchar_t *__restrict __pwc,
     const char *__restrict __s, size_t __n) noexcept (true);


extern int wctomb (char *__s, wchar_t __wchar) noexcept (true);


extern size_t mbstowcs (wchar_t *__restrict __pwcs,
   const char *__restrict __s, size_t __n) noexcept (true)
                                      ;

extern size_t wcstombs (char *__restrict __s,
   const wchar_t *__restrict __pwcs, size_t __n)
     noexcept (true)

                                    ;


extern int rpmatch (const char *__response) noexcept (true) __attribute__ ((__nonnull__ (1))) ;
# 967 "/usr/include/stdlib.h" 3 4
extern int getsubopt (char **__restrict __optionp,
        char *const *__restrict __tokens,
        char **__restrict __valuep)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2, 3))) ;


extern int posix_openpt (int __oflag) ;


extern int grantpt (int __fd) noexcept (true);


extern int unlockpt (int __fd) noexcept (true);


extern char *ptsname (int __fd) noexcept (true) ;


extern int ptsname_r (int __fd, char *__buf, size_t __buflen)
     noexcept (true) __attribute__ ((__nonnull__ (2))) ;


extern int getpt (void);


extern int getloadavg (double __loadavg[], int __nelem)
     noexcept (true) __attribute__ ((__nonnull__ (1)));
# 1023 "/usr/include/stdlib.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/stdlib-float.h" 1 3 4
# 1024 "/usr/include/stdlib.h" 2 3 4
# 1035 "/usr/include/stdlib.h" 3 4
}
# 39 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/std_abs.h" 2 3


extern "C++"
{
namespace std __attribute__ ((__visibility__ ("default")))
{


  using ::abs;


  inline long
  abs(long __i) { return __builtin_labs(__i); }


  inline long long
  abs(long long __x) { return __builtin_llabs (__x); }
# 70 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/std_abs.h" 3
  inline constexpr double
  abs(double __x)
  { return __builtin_fabs(__x); }

  inline constexpr float
  abs(float __x)
  { return __builtin_fabsf(__x); }

  inline constexpr long double
  abs(long double __x)
  { return __builtin_fabsl(__x); }
# 108 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/bits/std_abs.h" 3
}
}
# 48 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 2 3
# 77 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 3
extern "C++"
{
namespace std __attribute__ ((__visibility__ ("default")))
{


  using ::acos;


  inline constexpr float
  acos(float __x)
  { return __builtin_acosf(__x); }

  inline constexpr long double
  acos(long double __x)
  { return __builtin_acosl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    acos(_Tp __x)
    { return __builtin_acos(__x); }

  using ::asin;


  inline constexpr float
  asin(float __x)
  { return __builtin_asinf(__x); }

  inline constexpr long double
  asin(long double __x)
  { return __builtin_asinl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    asin(_Tp __x)
    { return __builtin_asin(__x); }

  using ::atan;


  inline constexpr float
  atan(float __x)
  { return __builtin_atanf(__x); }

  inline constexpr long double
  atan(long double __x)
  { return __builtin_atanl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    atan(_Tp __x)
    { return __builtin_atan(__x); }

  using ::atan2;


  inline constexpr float
  atan2(float __y, float __x)
  { return __builtin_atan2f(__y, __x); }

  inline constexpr long double
  atan2(long double __y, long double __x)
  { return __builtin_atan2l(__y, __x); }


  template<typename _Tp, typename _Up>
    inline constexpr
    typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    atan2(_Tp __y, _Up __x)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return atan2(__type(__y), __type(__x));
    }

  using ::ceil;


  inline constexpr float
  ceil(float __x)
  { return __builtin_ceilf(__x); }

  inline constexpr long double
  ceil(long double __x)
  { return __builtin_ceill(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    ceil(_Tp __x)
    { return __builtin_ceil(__x); }

  using ::cos;


  inline constexpr float
  cos(float __x)
  { return __builtin_cosf(__x); }

  inline constexpr long double
  cos(long double __x)
  { return __builtin_cosl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    cos(_Tp __x)
    { return __builtin_cos(__x); }

  using ::cosh;


  inline constexpr float
  cosh(float __x)
  { return __builtin_coshf(__x); }

  inline constexpr long double
  cosh(long double __x)
  { return __builtin_coshl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    cosh(_Tp __x)
    { return __builtin_cosh(__x); }

  using ::exp;


  inline constexpr float
  exp(float __x)
  { return __builtin_expf(__x); }

  inline constexpr long double
  exp(long double __x)
  { return __builtin_expl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    exp(_Tp __x)
    { return __builtin_exp(__x); }

  using ::fabs;


  inline constexpr float
  fabs(float __x)
  { return __builtin_fabsf(__x); }

  inline constexpr long double
  fabs(long double __x)
  { return __builtin_fabsl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    fabs(_Tp __x)
    { return __builtin_fabs(__x); }

  using ::floor;


  inline constexpr float
  floor(float __x)
  { return __builtin_floorf(__x); }

  inline constexpr long double
  floor(long double __x)
  { return __builtin_floorl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    floor(_Tp __x)
    { return __builtin_floor(__x); }

  using ::fmod;


  inline constexpr float
  fmod(float __x, float __y)
  { return __builtin_fmodf(__x, __y); }

  inline constexpr long double
  fmod(long double __x, long double __y)
  { return __builtin_fmodl(__x, __y); }


  template<typename _Tp, typename _Up>
    inline constexpr
    typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    fmod(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return fmod(__type(__x), __type(__y));
    }

  using ::frexp;


  inline float
  frexp(float __x, int* __exp)
  { return __builtin_frexpf(__x, __exp); }

  inline long double
  frexp(long double __x, int* __exp)
  { return __builtin_frexpl(__x, __exp); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    frexp(_Tp __x, int* __exp)
    { return __builtin_frexp(__x, __exp); }

  using ::ldexp;


  inline constexpr float
  ldexp(float __x, int __exp)
  { return __builtin_ldexpf(__x, __exp); }

  inline constexpr long double
  ldexp(long double __x, int __exp)
  { return __builtin_ldexpl(__x, __exp); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    ldexp(_Tp __x, int __exp)
    { return __builtin_ldexp(__x, __exp); }

  using ::log;


  inline constexpr float
  log(float __x)
  { return __builtin_logf(__x); }

  inline constexpr long double
  log(long double __x)
  { return __builtin_logl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    log(_Tp __x)
    { return __builtin_log(__x); }

  using ::log10;


  inline constexpr float
  log10(float __x)
  { return __builtin_log10f(__x); }

  inline constexpr long double
  log10(long double __x)
  { return __builtin_log10l(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    log10(_Tp __x)
    { return __builtin_log10(__x); }

  using ::modf;


  inline float
  modf(float __x, float* __iptr)
  { return __builtin_modff(__x, __iptr); }

  inline long double
  modf(long double __x, long double* __iptr)
  { return __builtin_modfl(__x, __iptr); }


  using ::pow;


  inline constexpr float
  pow(float __x, float __y)
  { return __builtin_powf(__x, __y); }

  inline constexpr long double
  pow(long double __x, long double __y)
  { return __builtin_powl(__x, __y); }
# 412 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 3
  template<typename _Tp, typename _Up>
    inline constexpr
    typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    pow(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return pow(__type(__x), __type(__y));
    }

  using ::sin;


  inline constexpr float
  sin(float __x)
  { return __builtin_sinf(__x); }

  inline constexpr long double
  sin(long double __x)
  { return __builtin_sinl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    sin(_Tp __x)
    { return __builtin_sin(__x); }

  using ::sinh;


  inline constexpr float
  sinh(float __x)
  { return __builtin_sinhf(__x); }

  inline constexpr long double
  sinh(long double __x)
  { return __builtin_sinhl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    sinh(_Tp __x)
    { return __builtin_sinh(__x); }

  using ::sqrt;


  inline constexpr float
  sqrt(float __x)
  { return __builtin_sqrtf(__x); }

  inline constexpr long double
  sqrt(long double __x)
  { return __builtin_sqrtl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    sqrt(_Tp __x)
    { return __builtin_sqrt(__x); }

  using ::tan;


  inline constexpr float
  tan(float __x)
  { return __builtin_tanf(__x); }

  inline constexpr long double
  tan(long double __x)
  { return __builtin_tanl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    tan(_Tp __x)
    { return __builtin_tan(__x); }

  using ::tanh;


  inline constexpr float
  tanh(float __x)
  { return __builtin_tanhf(__x); }

  inline constexpr long double
  tanh(long double __x)
  { return __builtin_tanhl(__x); }


  template<typename _Tp>
    inline constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    double>::__type
    tanh(_Tp __x)
    { return __builtin_tanh(__x); }
# 536 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 3
  constexpr int
  fpclassify(float __x)
  { return __builtin_fpclassify(0, 1, 4,
    3, 2, __x); }

  constexpr int
  fpclassify(double __x)
  { return __builtin_fpclassify(0, 1, 4,
    3, 2, __x); }

  constexpr int
  fpclassify(long double __x)
  { return __builtin_fpclassify(0, 1, 4,
    3, 2, __x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              int>::__type
    fpclassify(_Tp __x)
    { return __x != 0 ? 4 : 2; }


  constexpr bool
  isfinite(float __x)
  { return __builtin_isfinite(__x); }

  constexpr bool
  isfinite(double __x)
  { return __builtin_isfinite(__x); }

  constexpr bool
  isfinite(long double __x)
  { return __builtin_isfinite(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              bool>::__type
    isfinite(_Tp __x)
    { return true; }


  constexpr bool
  isinf(float __x)
  { return __builtin_isinf(__x); }


  constexpr bool
  isinf(double __x)
  { return __builtin_isinf(__x); }


  constexpr bool
  isinf(long double __x)
  { return __builtin_isinf(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              bool>::__type
    isinf(_Tp __x)
    { return false; }


  constexpr bool
  isnan(float __x)
  { return __builtin_isnan(__x); }


  constexpr bool
  isnan(double __x)
  { return __builtin_isnan(__x); }


  constexpr bool
  isnan(long double __x)
  { return __builtin_isnan(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              bool>::__type
    isnan(_Tp __x)
    { return false; }


  constexpr bool
  isnormal(float __x)
  { return __builtin_isnormal(__x); }

  constexpr bool
  isnormal(double __x)
  { return __builtin_isnormal(__x); }

  constexpr bool
  isnormal(long double __x)
  { return __builtin_isnormal(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              bool>::__type
    isnormal(_Tp __x)
    { return __x != 0 ? true : false; }


  constexpr bool
  signbit(float __x)
  { return __builtin_signbit(__x); }

  constexpr bool
  signbit(double __x)
  { return __builtin_signbit(__x); }

  constexpr bool
  signbit(long double __x)
  { return __builtin_signbit(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              bool>::__type
    signbit(_Tp __x)
    { return __x < 0 ? true : false; }


  constexpr bool
  isgreater(float __x, float __y)
  { return __builtin_isgreater(__x, __y); }

  constexpr bool
  isgreater(double __x, double __y)
  { return __builtin_isgreater(__x, __y); }

  constexpr bool
  isgreater(long double __x, long double __y)
  { return __builtin_isgreater(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename
    __gnu_cxx::__enable_if<(__is_arithmetic<_Tp>::__value
       && __is_arithmetic<_Up>::__value), bool>::__type
    isgreater(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return __builtin_isgreater(__type(__x), __type(__y));
    }


  constexpr bool
  isgreaterequal(float __x, float __y)
  { return __builtin_isgreaterequal(__x, __y); }

  constexpr bool
  isgreaterequal(double __x, double __y)
  { return __builtin_isgreaterequal(__x, __y); }

  constexpr bool
  isgreaterequal(long double __x, long double __y)
  { return __builtin_isgreaterequal(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename
    __gnu_cxx::__enable_if<(__is_arithmetic<_Tp>::__value
       && __is_arithmetic<_Up>::__value), bool>::__type
    isgreaterequal(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return __builtin_isgreaterequal(__type(__x), __type(__y));
    }


  constexpr bool
  isless(float __x, float __y)
  { return __builtin_isless(__x, __y); }

  constexpr bool
  isless(double __x, double __y)
  { return __builtin_isless(__x, __y); }

  constexpr bool
  isless(long double __x, long double __y)
  { return __builtin_isless(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename
    __gnu_cxx::__enable_if<(__is_arithmetic<_Tp>::__value
       && __is_arithmetic<_Up>::__value), bool>::__type
    isless(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return __builtin_isless(__type(__x), __type(__y));
    }


  constexpr bool
  islessequal(float __x, float __y)
  { return __builtin_islessequal(__x, __y); }

  constexpr bool
  islessequal(double __x, double __y)
  { return __builtin_islessequal(__x, __y); }

  constexpr bool
  islessequal(long double __x, long double __y)
  { return __builtin_islessequal(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename
    __gnu_cxx::__enable_if<(__is_arithmetic<_Tp>::__value
       && __is_arithmetic<_Up>::__value), bool>::__type
    islessequal(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return __builtin_islessequal(__type(__x), __type(__y));
    }


  constexpr bool
  islessgreater(float __x, float __y)
  { return __builtin_islessgreater(__x, __y); }

  constexpr bool
  islessgreater(double __x, double __y)
  { return __builtin_islessgreater(__x, __y); }

  constexpr bool
  islessgreater(long double __x, long double __y)
  { return __builtin_islessgreater(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename
    __gnu_cxx::__enable_if<(__is_arithmetic<_Tp>::__value
       && __is_arithmetic<_Up>::__value), bool>::__type
    islessgreater(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return __builtin_islessgreater(__type(__x), __type(__y));
    }


  constexpr bool
  isunordered(float __x, float __y)
  { return __builtin_isunordered(__x, __y); }

  constexpr bool
  isunordered(double __x, double __y)
  { return __builtin_isunordered(__x, __y); }

  constexpr bool
  isunordered(long double __x, long double __y)
  { return __builtin_isunordered(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename
    __gnu_cxx::__enable_if<(__is_arithmetic<_Tp>::__value
       && __is_arithmetic<_Up>::__value), bool>::__type
    isunordered(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return __builtin_isunordered(__type(__x), __type(__y));
    }
# 1065 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 3
  using ::double_t;
  using ::float_t;


  using ::acosh;
  using ::acoshf;
  using ::acoshl;

  using ::asinh;
  using ::asinhf;
  using ::asinhl;

  using ::atanh;
  using ::atanhf;
  using ::atanhl;

  using ::cbrt;
  using ::cbrtf;
  using ::cbrtl;

  using ::copysign;
  using ::copysignf;
  using ::copysignl;

  using ::erf;
  using ::erff;
  using ::erfl;

  using ::erfc;
  using ::erfcf;
  using ::erfcl;

  using ::exp2;
  using ::exp2f;
  using ::exp2l;

  using ::expm1;
  using ::expm1f;
  using ::expm1l;

  using ::fdim;
  using ::fdimf;
  using ::fdiml;

  using ::fma;
  using ::fmaf;
  using ::fmal;

  using ::fmax;
  using ::fmaxf;
  using ::fmaxl;

  using ::fmin;
  using ::fminf;
  using ::fminl;

  using ::hypot;
  using ::hypotf;
  using ::hypotl;

  using ::ilogb;
  using ::ilogbf;
  using ::ilogbl;

  using ::lgamma;
  using ::lgammaf;
  using ::lgammal;


  using ::llrint;
  using ::llrintf;
  using ::llrintl;

  using ::llround;
  using ::llroundf;
  using ::llroundl;


  using ::log1p;
  using ::log1pf;
  using ::log1pl;

  using ::log2;
  using ::log2f;
  using ::log2l;

  using ::logb;
  using ::logbf;
  using ::logbl;

  using ::lrint;
  using ::lrintf;
  using ::lrintl;

  using ::lround;
  using ::lroundf;
  using ::lroundl;

  using ::nan;
  using ::nanf;
  using ::nanl;

  using ::nearbyint;
  using ::nearbyintf;
  using ::nearbyintl;

  using ::nextafter;
  using ::nextafterf;
  using ::nextafterl;

  using ::nexttoward;
  using ::nexttowardf;
  using ::nexttowardl;

  using ::remainder;
  using ::remainderf;
  using ::remainderl;

  using ::remquo;
  using ::remquof;
  using ::remquol;

  using ::rint;
  using ::rintf;
  using ::rintl;

  using ::round;
  using ::roundf;
  using ::roundl;

  using ::scalbln;
  using ::scalblnf;
  using ::scalblnl;

  using ::scalbn;
  using ::scalbnf;
  using ::scalbnl;

  using ::tgamma;
  using ::tgammaf;
  using ::tgammal;

  using ::trunc;
  using ::truncf;
  using ::truncl;


  constexpr float
  acosh(float __x)
  { return __builtin_acoshf(__x); }

  constexpr long double
  acosh(long double __x)
  { return __builtin_acoshl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    acosh(_Tp __x)
    { return __builtin_acosh(__x); }


  constexpr float
  asinh(float __x)
  { return __builtin_asinhf(__x); }

  constexpr long double
  asinh(long double __x)
  { return __builtin_asinhl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    asinh(_Tp __x)
    { return __builtin_asinh(__x); }


  constexpr float
  atanh(float __x)
  { return __builtin_atanhf(__x); }

  constexpr long double
  atanh(long double __x)
  { return __builtin_atanhl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    atanh(_Tp __x)
    { return __builtin_atanh(__x); }


  constexpr float
  cbrt(float __x)
  { return __builtin_cbrtf(__x); }

  constexpr long double
  cbrt(long double __x)
  { return __builtin_cbrtl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    cbrt(_Tp __x)
    { return __builtin_cbrt(__x); }


  constexpr float
  copysign(float __x, float __y)
  { return __builtin_copysignf(__x, __y); }

  constexpr long double
  copysign(long double __x, long double __y)
  { return __builtin_copysignl(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    copysign(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return copysign(__type(__x), __type(__y));
    }


  constexpr float
  erf(float __x)
  { return __builtin_erff(__x); }

  constexpr long double
  erf(long double __x)
  { return __builtin_erfl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    erf(_Tp __x)
    { return __builtin_erf(__x); }


  constexpr float
  erfc(float __x)
  { return __builtin_erfcf(__x); }

  constexpr long double
  erfc(long double __x)
  { return __builtin_erfcl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    erfc(_Tp __x)
    { return __builtin_erfc(__x); }


  constexpr float
  exp2(float __x)
  { return __builtin_exp2f(__x); }

  constexpr long double
  exp2(long double __x)
  { return __builtin_exp2l(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    exp2(_Tp __x)
    { return __builtin_exp2(__x); }


  constexpr float
  expm1(float __x)
  { return __builtin_expm1f(__x); }

  constexpr long double
  expm1(long double __x)
  { return __builtin_expm1l(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    expm1(_Tp __x)
    { return __builtin_expm1(__x); }


  constexpr float
  fdim(float __x, float __y)
  { return __builtin_fdimf(__x, __y); }

  constexpr long double
  fdim(long double __x, long double __y)
  { return __builtin_fdiml(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    fdim(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return fdim(__type(__x), __type(__y));
    }


  constexpr float
  fma(float __x, float __y, float __z)
  { return __builtin_fmaf(__x, __y, __z); }

  constexpr long double
  fma(long double __x, long double __y, long double __z)
  { return __builtin_fmal(__x, __y, __z); }


  template<typename _Tp, typename _Up, typename _Vp>
    constexpr typename __gnu_cxx::__promote_3<_Tp, _Up, _Vp>::__type
    fma(_Tp __x, _Up __y, _Vp __z)
    {
      typedef typename __gnu_cxx::__promote_3<_Tp, _Up, _Vp>::__type __type;
      return fma(__type(__x), __type(__y), __type(__z));
    }


  constexpr float
  fmax(float __x, float __y)
  { return __builtin_fmaxf(__x, __y); }

  constexpr long double
  fmax(long double __x, long double __y)
  { return __builtin_fmaxl(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    fmax(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return fmax(__type(__x), __type(__y));
    }


  constexpr float
  fmin(float __x, float __y)
  { return __builtin_fminf(__x, __y); }

  constexpr long double
  fmin(long double __x, long double __y)
  { return __builtin_fminl(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    fmin(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return fmin(__type(__x), __type(__y));
    }


  constexpr float
  hypot(float __x, float __y)
  { return __builtin_hypotf(__x, __y); }

  constexpr long double
  hypot(long double __x, long double __y)
  { return __builtin_hypotl(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    hypot(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return hypot(__type(__x), __type(__y));
    }


  constexpr int
  ilogb(float __x)
  { return __builtin_ilogbf(__x); }

  constexpr int
  ilogb(long double __x)
  { return __builtin_ilogbl(__x); }


  template<typename _Tp>
    constexpr
    typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                    int>::__type
    ilogb(_Tp __x)
    { return __builtin_ilogb(__x); }


  constexpr float
  lgamma(float __x)
  { return __builtin_lgammaf(__x); }

  constexpr long double
  lgamma(long double __x)
  { return __builtin_lgammal(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    lgamma(_Tp __x)
    { return __builtin_lgamma(__x); }


  constexpr long long
  llrint(float __x)
  { return __builtin_llrintf(__x); }

  constexpr long long
  llrint(long double __x)
  { return __builtin_llrintl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              long long>::__type
    llrint(_Tp __x)
    { return __builtin_llrint(__x); }


  constexpr long long
  llround(float __x)
  { return __builtin_llroundf(__x); }

  constexpr long long
  llround(long double __x)
  { return __builtin_llroundl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              long long>::__type
    llround(_Tp __x)
    { return __builtin_llround(__x); }


  constexpr float
  log1p(float __x)
  { return __builtin_log1pf(__x); }

  constexpr long double
  log1p(long double __x)
  { return __builtin_log1pl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    log1p(_Tp __x)
    { return __builtin_log1p(__x); }


  constexpr float
  log2(float __x)
  { return __builtin_log2f(__x); }

  constexpr long double
  log2(long double __x)
  { return __builtin_log2l(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    log2(_Tp __x)
    { return __builtin_log2(__x); }


  constexpr float
  logb(float __x)
  { return __builtin_logbf(__x); }

  constexpr long double
  logb(long double __x)
  { return __builtin_logbl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    logb(_Tp __x)
    { return __builtin_logb(__x); }


  constexpr long
  lrint(float __x)
  { return __builtin_lrintf(__x); }

  constexpr long
  lrint(long double __x)
  { return __builtin_lrintl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              long>::__type
    lrint(_Tp __x)
    { return __builtin_lrint(__x); }


  constexpr long
  lround(float __x)
  { return __builtin_lroundf(__x); }

  constexpr long
  lround(long double __x)
  { return __builtin_lroundl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              long>::__type
    lround(_Tp __x)
    { return __builtin_lround(__x); }


  constexpr float
  nearbyint(float __x)
  { return __builtin_nearbyintf(__x); }

  constexpr long double
  nearbyint(long double __x)
  { return __builtin_nearbyintl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    nearbyint(_Tp __x)
    { return __builtin_nearbyint(__x); }


  constexpr float
  nextafter(float __x, float __y)
  { return __builtin_nextafterf(__x, __y); }

  constexpr long double
  nextafter(long double __x, long double __y)
  { return __builtin_nextafterl(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    nextafter(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return nextafter(__type(__x), __type(__y));
    }


  constexpr float
  nexttoward(float __x, long double __y)
  { return __builtin_nexttowardf(__x, __y); }

  constexpr long double
  nexttoward(long double __x, long double __y)
  { return __builtin_nexttowardl(__x, __y); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    nexttoward(_Tp __x, long double __y)
    { return __builtin_nexttoward(__x, __y); }


  constexpr float
  remainder(float __x, float __y)
  { return __builtin_remainderf(__x, __y); }

  constexpr long double
  remainder(long double __x, long double __y)
  { return __builtin_remainderl(__x, __y); }


  template<typename _Tp, typename _Up>
    constexpr typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    remainder(_Tp __x, _Up __y)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return remainder(__type(__x), __type(__y));
    }


  inline float
  remquo(float __x, float __y, int* __pquo)
  { return __builtin_remquof(__x, __y, __pquo); }

  inline long double
  remquo(long double __x, long double __y, int* __pquo)
  { return __builtin_remquol(__x, __y, __pquo); }


  template<typename _Tp, typename _Up>
    inline typename __gnu_cxx::__promote_2<_Tp, _Up>::__type
    remquo(_Tp __x, _Up __y, int* __pquo)
    {
      typedef typename __gnu_cxx::__promote_2<_Tp, _Up>::__type __type;
      return remquo(__type(__x), __type(__y), __pquo);
    }


  constexpr float
  rint(float __x)
  { return __builtin_rintf(__x); }

  constexpr long double
  rint(long double __x)
  { return __builtin_rintl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    rint(_Tp __x)
    { return __builtin_rint(__x); }


  constexpr float
  round(float __x)
  { return __builtin_roundf(__x); }

  constexpr long double
  round(long double __x)
  { return __builtin_roundl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    round(_Tp __x)
    { return __builtin_round(__x); }


  constexpr float
  scalbln(float __x, long __ex)
  { return __builtin_scalblnf(__x, __ex); }

  constexpr long double
  scalbln(long double __x, long __ex)
  { return __builtin_scalblnl(__x, __ex); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    scalbln(_Tp __x, long __ex)
    { return __builtin_scalbln(__x, __ex); }


  constexpr float
  scalbn(float __x, int __ex)
  { return __builtin_scalbnf(__x, __ex); }

  constexpr long double
  scalbn(long double __x, int __ex)
  { return __builtin_scalbnl(__x, __ex); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    scalbn(_Tp __x, int __ex)
    { return __builtin_scalbn(__x, __ex); }


  constexpr float
  tgamma(float __x)
  { return __builtin_tgammaf(__x); }

  constexpr long double
  tgamma(long double __x)
  { return __builtin_tgammal(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    tgamma(_Tp __x)
    { return __builtin_tgamma(__x); }


  constexpr float
  trunc(float __x)
  { return __builtin_truncf(__x); }

  constexpr long double
  trunc(long double __x)
  { return __builtin_truncl(__x); }


  template<typename _Tp>
    constexpr typename __gnu_cxx::__enable_if<__is_integer<_Tp>::__value,
                                              double>::__type
    trunc(_Tp __x)
    { return __builtin_trunc(__x); }
# 1932 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 3
}


}
# 42 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cstdlib" 1 3
# 40 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cstdlib" 3
# 121 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cstdlib" 3
extern "C++"
{
namespace std __attribute__ ((__visibility__ ("default")))
{


  using ::div_t;
  using ::ldiv_t;

  using ::abort;


  using ::atexit;


  using ::at_quick_exit;


  using ::atof;
  using ::atoi;
  using ::atol;
  using ::bsearch;
  using ::calloc;
  using ::div;
  using ::exit;
  using ::free;
  using ::getenv;
  using ::labs;
  using ::ldiv;
  using ::malloc;

  using ::mblen;
  using ::mbstowcs;
  using ::mbtowc;

  using ::qsort;


  using ::quick_exit;


  using ::rand;
  using ::realloc;
  using ::srand;
  using ::strtod;
  using ::strtol;
  using ::strtoul;
  using ::system;

  using ::wcstombs;
  using ::wctomb;


  inline ldiv_t
  div(long __i, long __j) { return ldiv(__i, __j); }


}
# 195 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cstdlib" 3
namespace __gnu_cxx __attribute__ ((__visibility__ ("default")))
{


  using ::lldiv_t;


  using ::_Exit;


  using ::llabs;

  inline lldiv_t
  div(long long __n, long long __d)
  { lldiv_t __q; __q.quot = __n / __d; __q.rem = __n % __d; return __q; }

  using ::lldiv;
# 227 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cstdlib" 3
  using ::atoll;
  using ::strtoll;
  using ::strtoull;

  using ::strtof;
  using ::strtold;


}

namespace std
{

  using ::__gnu_cxx::lldiv_t;

  using ::__gnu_cxx::_Exit;

  using ::__gnu_cxx::llabs;
  using ::__gnu_cxx::div;
  using ::__gnu_cxx::lldiv;

  using ::__gnu_cxx::atoll;
  using ::__gnu_cxx::strtof;
  using ::__gnu_cxx::strtoll;
  using ::__gnu_cxx::strtoull;
  using ::__gnu_cxx::strtold;
}


}
# 43 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/stdlib.h" 1 3
# 36 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/stdlib.h" 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cstdlib" 1 3
# 40 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cstdlib" 3
# 37 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/stdlib.h" 2 3

using std::abort;
using std::atexit;
using std::exit;


  using std::at_quick_exit;


  using std::quick_exit;


using std::div_t;
using std::ldiv_t;

using std::abs;
using std::atof;
using std::atoi;
using std::atol;
using std::bsearch;
using std::calloc;
using std::div;
using std::free;
using std::getenv;
using std::labs;
using std::ldiv;
using std::malloc;

using std::mblen;
using std::mbstowcs;
using std::mbtowc;

using std::qsort;
using std::rand;
using std::realloc;
using std::srand;
using std::strtod;
using std::strtol;
using std::strtoul;
using std::system;

using std::wcstombs;
using std::wctomb;
# 44 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/include/string.h" 1 3 4
# 26 "/usr/include/string.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h" 1 3 4
# 27 "/usr/include/string.h" 2 3 4

extern "C" {


# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 1 3 4
# 34 "/usr/include/string.h" 2 3 4
# 43 "/usr/include/string.h" 3 4
extern void *memcpy (void *__restrict __dest, const void *__restrict __src,
       size_t __n) noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern void *memmove (void *__dest, const void *__src, size_t __n)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern void *memccpy (void *__restrict __dest, const void *__restrict __src,
        int __c, size_t __n)
    noexcept (true) __attribute__ ((__nonnull__ (1, 2))) ;


extern void *memset (void *__s, int __c, size_t __n) noexcept (true) __attribute__ ((__nonnull__ (1)));


extern int memcmp (const void *__s1, const void *__s2, size_t __n)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
# 80 "/usr/include/string.h" 3 4
extern int __memcmpeq (const void *__s1, const void *__s2, size_t __n)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern "C++"
{
extern void *memchr (void *__s, int __c, size_t __n)
      noexcept (true) __asm ("memchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
extern const void *memchr (const void *__s, int __c, size_t __n)
      noexcept (true) __asm ("memchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
# 105 "/usr/include/string.h" 3 4
}
# 115 "/usr/include/string.h" 3 4
extern "C++" void *rawmemchr (void *__s, int __c)
     noexcept (true) __asm ("rawmemchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
extern "C++" const void *rawmemchr (const void *__s, int __c)
     noexcept (true) __asm ("rawmemchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));


extern "C++" void *memrchr (void *__s, int __c, size_t __n)
      noexcept (true) __asm ("memrchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)))
                                           ;
extern "C++" const void *memrchr (const void *__s, int __c, size_t __n)
      noexcept (true) __asm ("memrchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)))
                                           ;
# 141 "/usr/include/string.h" 3 4
extern char *strcpy (char *__restrict __dest, const char *__restrict __src)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));

extern char *strncpy (char *__restrict __dest,
        const char *__restrict __src, size_t __n)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern char *strcat (char *__restrict __dest, const char *__restrict __src)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));

extern char *strncat (char *__restrict __dest, const char *__restrict __src,
        size_t __n) noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern int strcmp (const char *__s1, const char *__s2)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));

extern int strncmp (const char *__s1, const char *__s2, size_t __n)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern int strcoll (const char *__s1, const char *__s2)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));

extern size_t strxfrm (char *__restrict __dest,
         const char *__restrict __src, size_t __n)
    noexcept (true) __attribute__ ((__nonnull__ (2))) ;


extern int strcoll_l (const char *__s1, const char *__s2, locale_t __l)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2, 3)));


extern size_t strxfrm_l (char *__dest, const char *__src, size_t __n,
    locale_t __l) noexcept (true) __attribute__ ((__nonnull__ (2, 4)))
                                           ;


extern char *strdup (const char *__s)
     noexcept (true) __attribute__ ((__malloc__)) __attribute__ ((__nonnull__ (1)));


extern char *strndup (const char *__string, size_t __n)
     noexcept (true) __attribute__ ((__malloc__)) __attribute__ ((__nonnull__ (1)));
# 224 "/usr/include/string.h" 3 4
extern "C++"
{
extern char *strchr (char *__s, int __c)
     noexcept (true) __asm ("strchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
extern const char *strchr (const char *__s, int __c)
     noexcept (true) __asm ("strchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
# 244 "/usr/include/string.h" 3 4
}


extern "C++"
{
extern char *strrchr (char *__s, int __c)
     noexcept (true) __asm ("strrchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
extern const char *strrchr (const char *__s, int __c)
     noexcept (true) __asm ("strrchr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
# 271 "/usr/include/string.h" 3 4
}
# 281 "/usr/include/string.h" 3 4
extern "C++" char *strchrnul (char *__s, int __c)
     noexcept (true) __asm ("strchrnul") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
extern "C++" const char *strchrnul (const char *__s, int __c)
     noexcept (true) __asm ("strchrnul") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
# 293 "/usr/include/string.h" 3 4
extern size_t strcspn (const char *__s, const char *__reject)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern size_t strspn (const char *__s, const char *__accept)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern "C++"
{
extern char *strpbrk (char *__s, const char *__accept)
     noexcept (true) __asm ("strpbrk") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
extern const char *strpbrk (const char *__s, const char *__accept)
     noexcept (true) __asm ("strpbrk") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
# 321 "/usr/include/string.h" 3 4
}


extern "C++"
{
extern char *strstr (char *__haystack, const char *__needle)
     noexcept (true) __asm ("strstr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
extern const char *strstr (const char *__haystack, const char *__needle)
     noexcept (true) __asm ("strstr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
# 348 "/usr/include/string.h" 3 4
}


extern char *strtok (char *__restrict __s, const char *__restrict __delim)
     noexcept (true) __attribute__ ((__nonnull__ (2)));


extern char *__strtok_r (char *__restrict __s,
    const char *__restrict __delim,
    char **__restrict __save_ptr)
     noexcept (true) __attribute__ ((__nonnull__ (2, 3)));

extern char *strtok_r (char *__restrict __s, const char *__restrict __delim,
         char **__restrict __save_ptr)
     noexcept (true) __attribute__ ((__nonnull__ (2, 3)));


extern "C++" char *strcasestr (char *__haystack, const char *__needle)
     noexcept (true) __asm ("strcasestr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
extern "C++" const char *strcasestr (const char *__haystack,
         const char *__needle)
     noexcept (true) __asm ("strcasestr") __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));
# 389 "/usr/include/string.h" 3 4
extern void *memmem (const void *__haystack, size_t __haystacklen,
       const void *__needle, size_t __needlelen)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 3)))

                                         ;


extern void *__mempcpy (void *__restrict __dest,
   const void *__restrict __src, size_t __n)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));
extern void *mempcpy (void *__restrict __dest,
        const void *__restrict __src, size_t __n)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern size_t strlen (const char *__s)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));


extern size_t strnlen (const char *__string, size_t __maxlen)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));


extern char *strerror (int __errnum) noexcept (true);
# 444 "/usr/include/string.h" 3 4
extern char *strerror_r (int __errnum, char *__buf, size_t __buflen)
     noexcept (true) __attribute__ ((__nonnull__ (2))) ;


extern const char *strerrordesc_np (int __err) noexcept (true);

extern const char *strerrorname_np (int __err) noexcept (true);


extern char *strerror_l (int __errnum, locale_t __l) noexcept (true);


# 1 "/usr/include/strings.h" 1 3 4
# 23 "/usr/include/strings.h" 3 4
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 1 3 4
# 24 "/usr/include/strings.h" 2 3 4


extern "C" {


extern int bcmp (const void *__s1, const void *__s2, size_t __n)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern void bcopy (const void *__src, void *__dest, size_t __n)
  noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern void bzero (void *__s, size_t __n) noexcept (true) __attribute__ ((__nonnull__ (1)));
# 68 "/usr/include/strings.h" 3 4
extern char *index (const char *__s, int __c)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));
# 96 "/usr/include/strings.h" 3 4
extern char *rindex (const char *__s, int __c)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1)));


extern int ffs (int __i) noexcept (true) __attribute__ ((__const__));


extern int ffsl (long int __l) noexcept (true) __attribute__ ((__const__));
__extension__ extern int ffsll (long long int __ll)
     noexcept (true) __attribute__ ((__const__));


extern int strcasecmp (const char *__s1, const char *__s2)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern int strncasecmp (const char *__s1, const char *__s2, size_t __n)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern int strcasecmp_l (const char *__s1, const char *__s2, locale_t __loc)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2, 3)));


extern int strncasecmp_l (const char *__s1, const char *__s2,
     size_t __n, locale_t __loc)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2, 4)));


}
# 463 "/usr/include/string.h" 2 3 4


extern void explicit_bzero (void *__s, size_t __n) noexcept (true) __attribute__ ((__nonnull__ (1)))
                                                  ;


extern char *strsep (char **__restrict __stringp,
       const char *__restrict __delim)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern char *strsignal (int __sig) noexcept (true);


extern const char *sigabbrev_np (int __sig) noexcept (true);


extern const char *sigdescr_np (int __sig) noexcept (true);


extern char *__stpcpy (char *__restrict __dest, const char *__restrict __src)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));
extern char *stpcpy (char *__restrict __dest, const char *__restrict __src)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern char *__stpncpy (char *__restrict __dest,
   const char *__restrict __src, size_t __n)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));
extern char *stpncpy (char *__restrict __dest,
        const char *__restrict __src, size_t __n)
     noexcept (true) __attribute__ ((__nonnull__ (1, 2)));


extern int strverscmp (const char *__s1, const char *__s2)
     noexcept (true) __attribute__ ((__pure__)) __attribute__ ((__nonnull__ (1, 2)));


extern char *strfry (char *__string) noexcept (true) __attribute__ ((__nonnull__ (1)));


extern void *memfrob (void *__s, size_t __n) noexcept (true) __attribute__ ((__nonnull__ (1)))
                                          ;


extern "C++" char *basename (char *__filename)
     noexcept (true) __asm ("basename") __attribute__ ((__nonnull__ (1)));
extern "C++" const char *basename (const char *__filename)
     noexcept (true) __asm ("basename") __attribute__ ((__nonnull__ (1)));
# 539 "/usr/include/string.h" 3 4
}
# 45 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 55 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/cuda.h" 1 3
# 55 "/usr/local/cuda-11.7/include/cuda.h" 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/stdlib.h" 1 3
# 56 "/usr/local/cuda-11.7/include/cuda.h" 2 3


# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stdint.h" 1 3
# 52 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stdint.h" 3
# 1 "/usr/include/stdint.h" 1 3 4
# 26 "/usr/include/stdint.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h" 1 3 4
# 27 "/usr/include/stdint.h" 2 3 4

# 1 "/usr/include/x86_64-linux-gnu/bits/wchar.h" 1 3 4
# 29 "/usr/include/stdint.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 30 "/usr/include/stdint.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/stdint-uintn.h" 1 3 4
# 24 "/usr/include/x86_64-linux-gnu/bits/stdint-uintn.h" 3 4
typedef __uint8_t uint8_t;
typedef __uint16_t uint16_t;
typedef __uint32_t uint32_t;
typedef __uint64_t uint64_t;
# 38 "/usr/include/stdint.h" 2 3 4


typedef __int_least8_t int_least8_t;
typedef __int_least16_t int_least16_t;
typedef __int_least32_t int_least32_t;
typedef __int_least64_t int_least64_t;


typedef __uint_least8_t uint_least8_t;
typedef __uint_least16_t uint_least16_t;
typedef __uint_least32_t uint_least32_t;
typedef __uint_least64_t uint_least64_t;


typedef signed char int_fast8_t;

typedef long int int_fast16_t;
typedef long int int_fast32_t;
typedef long int int_fast64_t;
# 71 "/usr/include/stdint.h" 3 4
typedef unsigned char uint_fast8_t;

typedef unsigned long int uint_fast16_t;
typedef unsigned long int uint_fast32_t;
typedef unsigned long int uint_fast64_t;
# 87 "/usr/include/stdint.h" 3 4
typedef long int intptr_t;


typedef unsigned long int uintptr_t;
# 101 "/usr/include/stdint.h" 3 4
typedef __intmax_t intmax_t;
typedef __uintmax_t uintmax_t;
# 53 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stdint.h" 2 3
# 61 "/usr/local/cuda-11.7/include/cuda.h" 2 3
typedef uint32_t cuuint32_t;
typedef uint64_t cuuint64_t;
# 247 "/usr/local/cuda-11.7/include/cuda.h" 3
extern "C" {


typedef unsigned long long CUdeviceptr_v2;


typedef CUdeviceptr_v2 CUdeviceptr;

typedef int CUdevice_v1;
typedef CUdevice_v1 CUdevice;
typedef struct CUctx_st *CUcontext;
typedef struct CUmod_st *CUmodule;
typedef struct CUfunc_st *CUfunction;
typedef struct CUarray_st *CUarray;
typedef struct CUmipmappedArray_st *CUmipmappedArray;
typedef struct CUtexref_st *CUtexref;
typedef struct CUsurfref_st *CUsurfref;
typedef struct CUevent_st *CUevent;
typedef struct CUstream_st *CUstream;
typedef struct CUgraphicsResource_st *CUgraphicsResource;
typedef unsigned long long CUtexObject_v1;
typedef CUtexObject_v1 CUtexObject;
typedef unsigned long long CUsurfObject_v1;
typedef CUsurfObject_v1 CUsurfObject;
typedef struct CUextMemory_st *CUexternalMemory;
typedef struct CUextSemaphore_st *CUexternalSemaphore;
typedef struct CUgraph_st *CUgraph;
typedef struct CUgraphNode_st *CUgraphNode;
typedef struct CUgraphExec_st *CUgraphExec;
typedef struct CUmemPoolHandle_st *CUmemoryPool;
typedef struct CUuserObject_st *CUuserObject;


typedef struct CUuuid_st {
    char bytes[16];
} CUuuid;
# 300 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef struct CUipcEventHandle_st {
    char reserved[64];
} CUipcEventHandle_v1;
typedef CUipcEventHandle_v1 CUipcEventHandle;


typedef struct CUipcMemHandle_st {
    char reserved[64];
} CUipcMemHandle_v1;
typedef CUipcMemHandle_v1 CUipcMemHandle;


typedef enum CUipcMem_flags_enum {
    CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1
} CUipcMem_flags;


typedef enum CUmemAttach_flags_enum {
    CU_MEM_ATTACH_GLOBAL = 0x1,
    CU_MEM_ATTACH_HOST = 0x2,
    CU_MEM_ATTACH_SINGLE = 0x4
} CUmemAttach_flags;


typedef enum CUctx_flags_enum {
    CU_CTX_SCHED_AUTO = 0x00,
    CU_CTX_SCHED_SPIN = 0x01,
    CU_CTX_SCHED_YIELD = 0x02,
    CU_CTX_SCHED_BLOCKING_SYNC = 0x04,
    CU_CTX_BLOCKING_SYNC = 0x04,


    CU_CTX_SCHED_MASK = 0x07,
    CU_CTX_MAP_HOST = 0x08,


    CU_CTX_LMEM_RESIZE_TO_MAX = 0x10,
    CU_CTX_FLAGS_MASK = 0x1f
} CUctx_flags;


typedef enum CUstream_flags_enum {
    CU_STREAM_DEFAULT = 0x0,
    CU_STREAM_NON_BLOCKING = 0x1
} CUstream_flags;
# 380 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef enum CUevent_flags_enum {
    CU_EVENT_DEFAULT = 0x0,
    CU_EVENT_BLOCKING_SYNC = 0x1,
    CU_EVENT_DISABLE_TIMING = 0x2,
    CU_EVENT_INTERPROCESS = 0x4
} CUevent_flags;


typedef enum CUevent_record_flags_enum {
    CU_EVENT_RECORD_DEFAULT = 0x0,
    CU_EVENT_RECORD_EXTERNAL = 0x1


} CUevent_record_flags;


typedef enum CUevent_wait_flags_enum {
    CU_EVENT_WAIT_DEFAULT = 0x0,
    CU_EVENT_WAIT_EXTERNAL = 0x1


} CUevent_wait_flags;


typedef enum CUstreamWaitValue_flags_enum {
    CU_STREAM_WAIT_VALUE_GEQ = 0x0,


    CU_STREAM_WAIT_VALUE_EQ = 0x1,
    CU_STREAM_WAIT_VALUE_AND = 0x2,
    CU_STREAM_WAIT_VALUE_NOR = 0x3,


    CU_STREAM_WAIT_VALUE_FLUSH = 1<<30
# 428 "/usr/local/cuda-11.7/include/cuda.h" 3
} CUstreamWaitValue_flags;


typedef enum CUstreamWriteValue_flags_enum {
    CU_STREAM_WRITE_VALUE_DEFAULT = 0x0,
    CU_STREAM_WRITE_VALUE_NO_MEMORY_BARRIER = 0x1


} CUstreamWriteValue_flags;


typedef enum CUstreamBatchMemOpType_enum {
    CU_STREAM_MEM_OP_WAIT_VALUE_32 = 1,
    CU_STREAM_MEM_OP_WRITE_VALUE_32 = 2,
    CU_STREAM_MEM_OP_WAIT_VALUE_64 = 4,
    CU_STREAM_MEM_OP_WRITE_VALUE_64 = 5,

    CU_STREAM_MEM_OP_BARRIER = 6,

    CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES = 3

} CUstreamBatchMemOpType;


typedef enum CUstreamMemoryBarrier_flags_enum {
    CU_STREAM_MEMORY_BARRIER_TYPE_SYS = 0x0,
    CU_STREAM_MEMORY_BARRIER_TYPE_GPU = 0x1
} CUstreamMemoryBarrier_flags;


typedef union CUstreamBatchMemOpParams_union {
    CUstreamBatchMemOpType operation;
    struct CUstreamMemOpWaitValueParams_st {
        CUstreamBatchMemOpType operation;
        CUdeviceptr address;
        union {
            cuuint32_t value;
            cuuint64_t value64;
        };
        unsigned int flags;
        CUdeviceptr alias;
    } waitValue;
    struct CUstreamMemOpWriteValueParams_st {
        CUstreamBatchMemOpType operation;
        CUdeviceptr address;
        union {
            cuuint32_t value;
            cuuint64_t value64;
        };
        unsigned int flags;
        CUdeviceptr alias;
    } writeValue;
    struct CUstreamMemOpFlushRemoteWritesParams_st {
        CUstreamBatchMemOpType operation;
        unsigned int flags;
    } flushRemoteWrites;

    struct CUstreamMemOpMemoryBarrierParams_st {
        CUstreamBatchMemOpType operation;
        unsigned int flags;
    } memoryBarrier;

    cuuint64_t pad[6];
} CUstreamBatchMemOpParams_v1;
typedef CUstreamBatchMemOpParams_v1 CUstreamBatchMemOpParams;


typedef struct CUDA_BATCH_MEM_OP_NODE_PARAMS_st {
    CUcontext ctx;
    unsigned int count;
    CUstreamBatchMemOpParams *paramArray;
    unsigned int flags;
} CUDA_BATCH_MEM_OP_NODE_PARAMS;


typedef enum CUoccupancy_flags_enum {
    CU_OCCUPANCY_DEFAULT = 0x0,
    CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE = 0x1
} CUoccupancy_flags;


typedef enum CUstreamUpdateCaptureDependencies_flags_enum {
    CU_STREAM_ADD_CAPTURE_DEPENDENCIES = 0x0,
    CU_STREAM_SET_CAPTURE_DEPENDENCIES = 0x1
} CUstreamUpdateCaptureDependencies_flags;


typedef enum CUarray_format_enum {
    CU_AD_FORMAT_UNSIGNED_INT8 = 0x01,
    CU_AD_FORMAT_UNSIGNED_INT16 = 0x02,
    CU_AD_FORMAT_UNSIGNED_INT32 = 0x03,
    CU_AD_FORMAT_SIGNED_INT8 = 0x08,
    CU_AD_FORMAT_SIGNED_INT16 = 0x09,
    CU_AD_FORMAT_SIGNED_INT32 = 0x0a,
    CU_AD_FORMAT_HALF = 0x10,
    CU_AD_FORMAT_FLOAT = 0x20,
    CU_AD_FORMAT_NV12 = 0xb0,
    CU_AD_FORMAT_UNORM_INT8X1 = 0xc0,
    CU_AD_FORMAT_UNORM_INT8X2 = 0xc1,
    CU_AD_FORMAT_UNORM_INT8X4 = 0xc2,
    CU_AD_FORMAT_UNORM_INT16X1 = 0xc3,
    CU_AD_FORMAT_UNORM_INT16X2 = 0xc4,
    CU_AD_FORMAT_UNORM_INT16X4 = 0xc5,
    CU_AD_FORMAT_SNORM_INT8X1 = 0xc6,
    CU_AD_FORMAT_SNORM_INT8X2 = 0xc7,
    CU_AD_FORMAT_SNORM_INT8X4 = 0xc8,
    CU_AD_FORMAT_SNORM_INT16X1 = 0xc9,
    CU_AD_FORMAT_SNORM_INT16X2 = 0xca,
    CU_AD_FORMAT_SNORM_INT16X4 = 0xcb,
    CU_AD_FORMAT_BC1_UNORM = 0x91,
    CU_AD_FORMAT_BC1_UNORM_SRGB = 0x92,
    CU_AD_FORMAT_BC2_UNORM = 0x93,
    CU_AD_FORMAT_BC2_UNORM_SRGB = 0x94,
    CU_AD_FORMAT_BC3_UNORM = 0x95,
    CU_AD_FORMAT_BC3_UNORM_SRGB = 0x96,
    CU_AD_FORMAT_BC4_UNORM = 0x97,
    CU_AD_FORMAT_BC4_SNORM = 0x98,
    CU_AD_FORMAT_BC5_UNORM = 0x99,
    CU_AD_FORMAT_BC5_SNORM = 0x9a,
    CU_AD_FORMAT_BC6H_UF16 = 0x9b,
    CU_AD_FORMAT_BC6H_SF16 = 0x9c,
    CU_AD_FORMAT_BC7_UNORM = 0x9d,
    CU_AD_FORMAT_BC7_UNORM_SRGB = 0x9e
} CUarray_format;


typedef enum CUaddress_mode_enum {
    CU_TR_ADDRESS_MODE_WRAP = 0,
    CU_TR_ADDRESS_MODE_CLAMP = 1,
    CU_TR_ADDRESS_MODE_MIRROR = 2,
    CU_TR_ADDRESS_MODE_BORDER = 3
} CUaddress_mode;


typedef enum CUfilter_mode_enum {
    CU_TR_FILTER_MODE_POINT = 0,
    CU_TR_FILTER_MODE_LINEAR = 1
} CUfilter_mode;


typedef enum CUdevice_attribute_enum {
    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,
    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,
    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,
    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,
    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,
    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,
    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,
    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,
    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,
    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,
    CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,
    CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,
    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,
    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,
    CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,
    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,
    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,
    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,
    CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,
    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,
    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_WIDTH = 27,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_HEIGHT = 28,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LAYERED_LAYERS = 29,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29,
    CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,
    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,
    CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,
    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,
    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,
    CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35,
    CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,
    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,
    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,
    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,
    CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,
    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43,
    CU_DEVICE_ATTRIBUTE_CAN_TEX2D_GATHER = 44,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_WIDTH = 45,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_GATHER_HEIGHT = 46,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH_ALTERNATE = 47,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT_ALTERNATE = 48,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH_ALTERNATE = 49,
    CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50,
    CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_WIDTH = 52,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_WIDTH = 53,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURECUBEMAP_LAYERED_LAYERS = 54,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_WIDTH = 55,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_WIDTH = 56,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_HEIGHT = 57,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_WIDTH = 58,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_HEIGHT = 59,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE3D_DEPTH = 60,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_WIDTH = 61,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE1D_LAYERED_LAYERS = 62,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_WIDTH = 63,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_HEIGHT = 64,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACE2D_LAYERED_LAYERS = 65,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_WIDTH = 66,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_WIDTH = 67,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_SURFACECUBEMAP_LAYERED_LAYERS = 68,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LINEAR_WIDTH = 69,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_WIDTH = 70,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_HEIGHT = 71,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_LINEAR_PITCH = 72,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_WIDTH = 73,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_MIPMAPPED_HEIGHT = 74,
    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
    CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_MIPMAPPED_WIDTH = 77,
    CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78,
    CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79,
    CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80,
    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81,
    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82,
    CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83,
    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD = 84,
    CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85,
    CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86,
    CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87,
    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88,
    CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89,
    CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90,
    CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91,
    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_MEM_OPS = 92,
    CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS = 93,
    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR = 94,
    CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95,
    CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96,
    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97,
    CU_DEVICE_ATTRIBUTE_CAN_FLUSH_REMOTE_WRITES = 98,
    CU_DEVICE_ATTRIBUTE_HOST_REGISTER_SUPPORTED = 99,
    CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS_USES_HOST_PAGE_TABLES = 100,
    CU_DEVICE_ATTRIBUTE_DIRECT_MANAGED_MEM_ACCESS_FROM_HOST = 101,
    CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED = 102,
    CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
    CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
    CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
    CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
    CU_DEVICE_ATTRIBUTE_MAX_BLOCKS_PER_MULTIPROCESSOR = 106,
    CU_DEVICE_ATTRIBUTE_GENERIC_COMPRESSION_SUPPORTED = 107,
    CU_DEVICE_ATTRIBUTE_MAX_PERSISTING_L2_CACHE_SIZE = 108,
    CU_DEVICE_ATTRIBUTE_MAX_ACCESS_POLICY_WINDOW_SIZE = 109,
    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WITH_CUDA_VMM_SUPPORTED = 110,
    CU_DEVICE_ATTRIBUTE_RESERVED_SHARED_MEMORY_PER_BLOCK = 111,
    CU_DEVICE_ATTRIBUTE_SPARSE_CUDA_ARRAY_SUPPORTED = 112,
    CU_DEVICE_ATTRIBUTE_READ_ONLY_HOST_REGISTER_SUPPORTED = 113,
    CU_DEVICE_ATTRIBUTE_TIMELINE_SEMAPHORE_INTEROP_SUPPORTED = 114,
    CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED = 116,
    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS = 117,
    CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_WRITES_ORDERING = 118,
    CU_DEVICE_ATTRIBUTE_MEMPOOL_SUPPORTED_HANDLE_TYPES = 119,


    CU_DEVICE_ATTRIBUTE_DEFERRED_MAPPING_CUDA_ARRAY_SUPPORTED = 121,


    CU_DEVICE_ATTRIBUTE_CAN_USE_64_BIT_STREAM_MEM_OPS_V2 = 122,
    CU_DEVICE_ATTRIBUTE_CAN_USE_STREAM_WAIT_VALUE_NOR_V2 = 123,


    CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED = 124,

    CU_DEVICE_ATTRIBUTE_MAX
} CUdevice_attribute;


typedef struct CUdevprop_st {
    int maxThreadsPerBlock;
    int maxThreadsDim[3];
    int maxGridSize[3];
    int sharedMemPerBlock;
    int totalConstantMemory;
    int SIMDWidth;
    int memPitch;
    int regsPerBlock;
    int clockRate;
    int textureAlign;
} CUdevprop_v1;
typedef CUdevprop_v1 CUdevprop;


typedef enum CUpointer_attribute_enum {
    CU_POINTER_ATTRIBUTE_CONTEXT = 1,
    CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2,
    CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3,
    CU_POINTER_ATTRIBUTE_HOST_POINTER = 4,
    CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5,
    CU_POINTER_ATTRIBUTE_SYNC_MEMOPS = 6,
    CU_POINTER_ATTRIBUTE_BUFFER_ID = 7,
    CU_POINTER_ATTRIBUTE_IS_MANAGED = 8,
    CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL = 9,
    CU_POINTER_ATTRIBUTE_IS_LEGACY_CUDA_IPC_CAPABLE = 10,
    CU_POINTER_ATTRIBUTE_RANGE_START_ADDR = 11,
    CU_POINTER_ATTRIBUTE_RANGE_SIZE = 12,
    CU_POINTER_ATTRIBUTE_MAPPED = 13,
    CU_POINTER_ATTRIBUTE_ALLOWED_HANDLE_TYPES = 14,
    CU_POINTER_ATTRIBUTE_IS_GPU_DIRECT_RDMA_CAPABLE = 15,
    CU_POINTER_ATTRIBUTE_ACCESS_FLAGS = 16,
    CU_POINTER_ATTRIBUTE_MEMPOOL_HANDLE = 17

    ,
    CU_POINTER_ATTRIBUTE_MAPPING_SIZE = 18,
    CU_POINTER_ATTRIBUTE_MAPPING_BASE_ADDR = 19,
    CU_POINTER_ATTRIBUTE_MEMORY_BLOCK_ID = 20

} CUpointer_attribute;


typedef enum CUfunction_attribute_enum {


    CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,


    CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,


    CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,


    CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,


    CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
# 824 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
# 833 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,


    CU_FUNC_ATTRIBUTE_CACHE_MODE_CA = 7,


    CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES = 8,
# 856 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 9,
# 929 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_FUNC_ATTRIBUTE_MAX
} CUfunction_attribute;


typedef enum CUfunc_cache_enum {
    CU_FUNC_CACHE_PREFER_NONE = 0x00,
    CU_FUNC_CACHE_PREFER_SHARED = 0x01,
    CU_FUNC_CACHE_PREFER_L1 = 0x02,
    CU_FUNC_CACHE_PREFER_EQUAL = 0x03
} CUfunc_cache;


typedef enum CUsharedconfig_enum {
    CU_SHARED_MEM_CONFIG_DEFAULT_BANK_SIZE = 0x00,
    CU_SHARED_MEM_CONFIG_FOUR_BYTE_BANK_SIZE = 0x01,
    CU_SHARED_MEM_CONFIG_EIGHT_BYTE_BANK_SIZE = 0x02
} CUsharedconfig;


typedef enum CUshared_carveout_enum {
    CU_SHAREDMEM_CARVEOUT_DEFAULT = -1,
    CU_SHAREDMEM_CARVEOUT_MAX_SHARED = 100,
    CU_SHAREDMEM_CARVEOUT_MAX_L1 = 0
} CUshared_carveout;


typedef enum CUmemorytype_enum {
    CU_MEMORYTYPE_HOST = 0x01,
    CU_MEMORYTYPE_DEVICE = 0x02,
    CU_MEMORYTYPE_ARRAY = 0x03,
    CU_MEMORYTYPE_UNIFIED = 0x04
} CUmemorytype;


typedef enum CUcomputemode_enum {
    CU_COMPUTEMODE_DEFAULT = 0,
    CU_COMPUTEMODE_PROHIBITED = 2,
    CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3
} CUcomputemode;


typedef enum CUmem_advise_enum {
    CU_MEM_ADVISE_SET_READ_MOSTLY = 1,
    CU_MEM_ADVISE_UNSET_READ_MOSTLY = 2,
    CU_MEM_ADVISE_SET_PREFERRED_LOCATION = 3,
    CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION = 4,
    CU_MEM_ADVISE_SET_ACCESSED_BY = 5,
    CU_MEM_ADVISE_UNSET_ACCESSED_BY = 6
} CUmem_advise;

typedef enum CUmem_range_attribute_enum {
    CU_MEM_RANGE_ATTRIBUTE_READ_MOSTLY = 1,
    CU_MEM_RANGE_ATTRIBUTE_PREFERRED_LOCATION = 2,
    CU_MEM_RANGE_ATTRIBUTE_ACCESSED_BY = 3,
    CU_MEM_RANGE_ATTRIBUTE_LAST_PREFETCH_LOCATION = 4
} CUmem_range_attribute;


typedef enum CUjit_option_enum
{


    CU_JIT_MAX_REGISTERS = 0,
# 1023 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_THREADS_PER_BLOCK,


    CU_JIT_WALL_TIME,
# 1040 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_INFO_LOG_BUFFER,
# 1049 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
# 1058 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_ERROR_LOG_BUFFER,
# 1067 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,


    CU_JIT_OPTIMIZATION_LEVEL,


    CU_JIT_TARGET_FROM_CUCONTEXT,


    CU_JIT_TARGET,
# 1100 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_FALLBACK_STRATEGY,


    CU_JIT_GENERATE_DEBUG_INFO,


    CU_JIT_LOG_VERBOSE,


    CU_JIT_GENERATE_LINE_INFO,


    CU_JIT_CACHE_MODE,


    CU_JIT_NEW_SM3X_OPT,


    CU_JIT_FAST_COMPILE,
# 1155 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_GLOBAL_SYMBOL_NAMES,
# 1164 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_GLOBAL_SYMBOL_ADDRESSES,


    CU_JIT_GLOBAL_SYMBOL_COUNT,


    CU_JIT_LTO,
# 1189 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_FTZ,
# 1199 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_PREC_DIV,
# 1209 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_PREC_SQRT,
# 1218 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_FMA,
# 1236 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_REFERENCED_KERNEL_NAMES,


    CU_JIT_REFERENCED_KERNEL_COUNT,
# 1260 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_REFERENCED_VARIABLE_NAMES,


    CU_JIT_REFERENCED_VARIABLE_COUNT,
# 1279 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_JIT_OPTIMIZE_UNUSED_DEVICE_VARIABLES,


    CU_JIT_NUM_OPTIONS

} CUjit_option;


typedef enum CUjit_target_enum
{

    CU_TARGET_COMPUTE_20 = 20,
    CU_TARGET_COMPUTE_21 = 21,


    CU_TARGET_COMPUTE_30 = 30,
    CU_TARGET_COMPUTE_32 = 32,
    CU_TARGET_COMPUTE_35 = 35,
    CU_TARGET_COMPUTE_37 = 37,


    CU_TARGET_COMPUTE_50 = 50,
    CU_TARGET_COMPUTE_52 = 52,
    CU_TARGET_COMPUTE_53 = 53,


    CU_TARGET_COMPUTE_60 = 60,
    CU_TARGET_COMPUTE_61 = 61,
    CU_TARGET_COMPUTE_62 = 62,


    CU_TARGET_COMPUTE_70 = 70,
    CU_TARGET_COMPUTE_72 = 72,

    CU_TARGET_COMPUTE_75 = 75,

    CU_TARGET_COMPUTE_80 = 80,
    CU_TARGET_COMPUTE_86 = 86,
    CU_TARGET_COMPUTE_87 = 87,


} CUjit_target;


typedef enum CUjit_fallback_enum
{
    CU_PREFER_PTX = 0,

    CU_PREFER_BINARY

} CUjit_fallback;


typedef enum CUjit_cacheMode_enum
{
    CU_JIT_CACHE_OPTION_NONE = 0,
    CU_JIT_CACHE_OPTION_CG,
    CU_JIT_CACHE_OPTION_CA
} CUjit_cacheMode;


typedef enum CUjitInputType_enum
{


    CU_JIT_INPUT_CUBIN = 0,


    CU_JIT_INPUT_PTX,


    CU_JIT_INPUT_FATBINARY,


    CU_JIT_INPUT_OBJECT,


    CU_JIT_INPUT_LIBRARY,


    CU_JIT_INPUT_NVVM,

    CU_JIT_NUM_INPUT_TYPES
} CUjitInputType;

typedef struct CUlinkState_st *CUlinkState;


typedef enum CUgraphicsRegisterFlags_enum {
    CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
    CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
    CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
    CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04,
    CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08
} CUgraphicsRegisterFlags;


typedef enum CUgraphicsMapResourceFlags_enum {
    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
} CUgraphicsMapResourceFlags;


typedef enum CUarray_cubemap_face_enum {
    CU_CUBEMAP_FACE_POSITIVE_X = 0x00,
    CU_CUBEMAP_FACE_NEGATIVE_X = 0x01,
    CU_CUBEMAP_FACE_POSITIVE_Y = 0x02,
    CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03,
    CU_CUBEMAP_FACE_POSITIVE_Z = 0x04,
    CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05
} CUarray_cubemap_face;


typedef enum CUlimit_enum {
    CU_LIMIT_STACK_SIZE = 0x00,
    CU_LIMIT_PRINTF_FIFO_SIZE = 0x01,
    CU_LIMIT_MALLOC_HEAP_SIZE = 0x02,
    CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x03,
    CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x04,
    CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x05,
    CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x06,
    CU_LIMIT_MAX
} CUlimit;


typedef enum CUresourcetype_enum {
    CU_RESOURCE_TYPE_ARRAY = 0x00,
    CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 0x01,
    CU_RESOURCE_TYPE_LINEAR = 0x02,
    CU_RESOURCE_TYPE_PITCH2D = 0x03
} CUresourcetype;
# 1459 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef void ( *CUhostFn)(void *userData);


typedef enum CUaccessProperty_enum {
    CU_ACCESS_PROPERTY_NORMAL = 0,
    CU_ACCESS_PROPERTY_STREAMING = 1,
    CU_ACCESS_PROPERTY_PERSISTING = 2
} CUaccessProperty;
# 1482 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef struct CUaccessPolicyWindow_st {
    void *base_ptr;
    size_t num_bytes;
    float hitRatio;
    CUaccessProperty hitProp;
    CUaccessProperty missProp;
} CUaccessPolicyWindow_v1;
typedef CUaccessPolicyWindow_v1 CUaccessPolicyWindow;


typedef struct CUDA_KERNEL_NODE_PARAMS_st {
    CUfunction func;
    unsigned int gridDimX;
    unsigned int gridDimY;
    unsigned int gridDimZ;
    unsigned int blockDimX;
    unsigned int blockDimY;
    unsigned int blockDimZ;
    unsigned int sharedMemBytes;
    void **kernelParams;
    void **extra;
} CUDA_KERNEL_NODE_PARAMS_v1;
typedef CUDA_KERNEL_NODE_PARAMS_v1 CUDA_KERNEL_NODE_PARAMS;


typedef struct CUDA_MEMSET_NODE_PARAMS_st {
    CUdeviceptr dst;
    size_t pitch;
    unsigned int value;
    unsigned int elementSize;
    size_t width;
    size_t height;
} CUDA_MEMSET_NODE_PARAMS_v1;
typedef CUDA_MEMSET_NODE_PARAMS_v1 CUDA_MEMSET_NODE_PARAMS;


typedef struct CUDA_HOST_NODE_PARAMS_st {
    CUhostFn fn;
    void* userData;
} CUDA_HOST_NODE_PARAMS_v1;
typedef CUDA_HOST_NODE_PARAMS_v1 CUDA_HOST_NODE_PARAMS;


typedef enum CUgraphNodeType_enum {
    CU_GRAPH_NODE_TYPE_KERNEL = 0,
    CU_GRAPH_NODE_TYPE_MEMCPY = 1,
    CU_GRAPH_NODE_TYPE_MEMSET = 2,
    CU_GRAPH_NODE_TYPE_HOST = 3,
    CU_GRAPH_NODE_TYPE_GRAPH = 4,
    CU_GRAPH_NODE_TYPE_EMPTY = 5,
    CU_GRAPH_NODE_TYPE_WAIT_EVENT = 6,
    CU_GRAPH_NODE_TYPE_EVENT_RECORD = 7,
    CU_GRAPH_NODE_TYPE_EXT_SEMAS_SIGNAL = 8,
    CU_GRAPH_NODE_TYPE_EXT_SEMAS_WAIT = 9,
    CU_GRAPH_NODE_TYPE_MEM_ALLOC = 10,
    CU_GRAPH_NODE_TYPE_MEM_FREE = 11

    ,
    CU_GRAPH_NODE_TYPE_BATCH_MEM_OP = 12

} CUgraphNodeType;
# 1578 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef enum CUsynchronizationPolicy_enum {
    CU_SYNC_POLICY_AUTO = 1,
    CU_SYNC_POLICY_SPIN = 2,
    CU_SYNC_POLICY_YIELD = 3,
    CU_SYNC_POLICY_BLOCKING_SYNC = 4
} CUsynchronizationPolicy;
# 1690 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef enum CUkernelNodeAttrID_enum {
    CU_KERNEL_NODE_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1
  , CU_KERNEL_NODE_ATTRIBUTE_COOPERATIVE = 2

  , CU_KERNEL_NODE_ATTRIBUTE_PRIORITY = 8

} CUkernelNodeAttrID;
# 1710 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef union CUkernelNodeAttrValue_union {
    CUaccessPolicyWindow accessPolicyWindow;
    int cooperative;

    int priority;

} CUkernelNodeAttrValue_v1;


typedef CUkernelNodeAttrValue_v1 CUkernelNodeAttrValue;


typedef enum CUstreamCaptureStatus_enum {
    CU_STREAM_CAPTURE_STATUS_NONE = 0,
    CU_STREAM_CAPTURE_STATUS_ACTIVE = 1,
    CU_STREAM_CAPTURE_STATUS_INVALIDATED = 2

} CUstreamCaptureStatus;


typedef enum CUstreamCaptureMode_enum {
    CU_STREAM_CAPTURE_MODE_GLOBAL = 0,
    CU_STREAM_CAPTURE_MODE_THREAD_LOCAL = 1,
    CU_STREAM_CAPTURE_MODE_RELAXED = 2
} CUstreamCaptureMode;


typedef enum CUstreamAttrID_enum {
    CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1,
    CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3
} CUstreamAttrID;
# 1760 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef union CUstreamAttrValue_union {
    CUaccessPolicyWindow accessPolicyWindow;
    CUsynchronizationPolicy syncPolicy;
} CUstreamAttrValue_v1;


typedef CUstreamAttrValue_v1 CUstreamAttrValue;


typedef enum CUdriverProcAddress_flags_enum {
    CU_GET_PROC_ADDRESS_DEFAULT = 0,
    CU_GET_PROC_ADDRESS_LEGACY_STREAM = 1 << 0,
    CU_GET_PROC_ADDRESS_PER_THREAD_DEFAULT_STREAM = 1 << 1
} CUdriverProcAddress_flags;


typedef enum CUexecAffinityType_enum {
    CU_EXEC_AFFINITY_TYPE_SM_COUNT = 0,
    CU_EXEC_AFFINITY_TYPE_MAX
} CUexecAffinityType;


typedef struct CUexecAffinitySmCount_st {
    unsigned int val;
} CUexecAffinitySmCount_v1;
typedef CUexecAffinitySmCount_v1 CUexecAffinitySmCount;


typedef struct CUexecAffinityParam_st {
    CUexecAffinityType type;
    union {
        CUexecAffinitySmCount smCount;
    } param;
} CUexecAffinityParam_v1;
typedef CUexecAffinityParam_v1 CUexecAffinityParam;


typedef enum cudaError_enum {


    CUDA_SUCCESS = 0,


    CUDA_ERROR_INVALID_VALUE = 1,


    CUDA_ERROR_OUT_OF_MEMORY = 2,


    CUDA_ERROR_NOT_INITIALIZED = 3,


    CUDA_ERROR_DEINITIALIZED = 4,


    CUDA_ERROR_PROFILER_DISABLED = 5,


    CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6,


    CUDA_ERROR_PROFILER_ALREADY_STARTED = 7,


    CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8,


    CUDA_ERROR_STUB_LIBRARY = 34,


    CUDA_ERROR_DEVICE_UNAVAILABLE = 46,


    CUDA_ERROR_NO_DEVICE = 100,


    CUDA_ERROR_INVALID_DEVICE = 101,


    CUDA_ERROR_DEVICE_NOT_LICENSED = 102,


    CUDA_ERROR_INVALID_IMAGE = 200,
# 1914 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_INVALID_CONTEXT = 201,
# 1923 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202,


    CUDA_ERROR_MAP_FAILED = 205,


    CUDA_ERROR_UNMAP_FAILED = 206,


    CUDA_ERROR_ARRAY_IS_MAPPED = 207,


    CUDA_ERROR_ALREADY_MAPPED = 208,


    CUDA_ERROR_NO_BINARY_FOR_GPU = 209,


    CUDA_ERROR_ALREADY_ACQUIRED = 210,


    CUDA_ERROR_NOT_MAPPED = 211,


    CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212,


    CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213,


    CUDA_ERROR_ECC_UNCORRECTABLE = 214,


    CUDA_ERROR_UNSUPPORTED_LIMIT = 215,


    CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216,


    CUDA_ERROR_PEER_ACCESS_UNSUPPORTED = 217,


    CUDA_ERROR_INVALID_PTX = 218,


    CUDA_ERROR_INVALID_GRAPHICS_CONTEXT = 219,


    CUDA_ERROR_NVLINK_UNCORRECTABLE = 220,


    CUDA_ERROR_JIT_COMPILER_NOT_FOUND = 221,


    CUDA_ERROR_UNSUPPORTED_PTX_VERSION = 222,


    CUDA_ERROR_JIT_COMPILATION_DISABLED = 223,


    CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY = 224,


    CUDA_ERROR_INVALID_SOURCE = 300,


    CUDA_ERROR_FILE_NOT_FOUND = 301,


    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,


    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303,


    CUDA_ERROR_OPERATING_SYSTEM = 304,


    CUDA_ERROR_INVALID_HANDLE = 400,


    CUDA_ERROR_ILLEGAL_STATE = 401,


    CUDA_ERROR_NOT_FOUND = 500,


    CUDA_ERROR_NOT_READY = 600,
# 2099 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_ILLEGAL_ADDRESS = 700,
# 2110 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701,
# 2120 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_LAUNCH_TIMEOUT = 702,


    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703,


    CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,


    CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705,


    CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708,


    CUDA_ERROR_CONTEXT_IS_DESTROYED = 709,


    CUDA_ERROR_ASSERT = 710,


    CUDA_ERROR_TOO_MANY_PEERS = 711,


    CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,


    CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED = 713,
# 2189 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_HARDWARE_STACK_ERROR = 714,


    CUDA_ERROR_ILLEGAL_INSTRUCTION = 715,
# 2206 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_MISALIGNED_ADDRESS = 716,
# 2217 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_INVALID_ADDRESS_SPACE = 717,


    CUDA_ERROR_INVALID_PC = 718,
# 2236 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_LAUNCH_FAILED = 719,
# 2245 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE = 720,


    CUDA_ERROR_NOT_PERMITTED = 800,


    CUDA_ERROR_NOT_SUPPORTED = 801,
# 2265 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_SYSTEM_NOT_READY = 802,


    CUDA_ERROR_SYSTEM_DRIVER_MISMATCH = 803,
# 2281 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,


    CUDA_ERROR_MPS_CONNECTION_FAILED = 805,


    CUDA_ERROR_MPS_RPC_FAILURE = 806,


    CUDA_ERROR_MPS_SERVER_NOT_READY = 807,


    CUDA_ERROR_MPS_MAX_CLIENTS_REACHED = 808,


    CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED = 809,


    CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED = 900,


    CUDA_ERROR_STREAM_CAPTURE_INVALIDATED = 901,


    CUDA_ERROR_STREAM_CAPTURE_MERGE = 902,


    CUDA_ERROR_STREAM_CAPTURE_UNMATCHED = 903,


    CUDA_ERROR_STREAM_CAPTURE_UNJOINED = 904,


    CUDA_ERROR_STREAM_CAPTURE_ISOLATION = 905,


    CUDA_ERROR_STREAM_CAPTURE_IMPLICIT = 906,


    CUDA_ERROR_CAPTURED_EVENT = 907,


    CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD = 908,


    CUDA_ERROR_TIMEOUT = 909,


    CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE = 910,
# 2383 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_EXTERNAL_DEVICE = 911,
# 2395 "/usr/local/cuda-11.7/include/cuda.h" 3
    CUDA_ERROR_UNKNOWN = 999
} CUresult;


typedef enum CUdevice_P2PAttribute_enum {
    CU_DEVICE_P2P_ATTRIBUTE_PERFORMANCE_RANK = 0x01,
    CU_DEVICE_P2P_ATTRIBUTE_ACCESS_SUPPORTED = 0x02,
    CU_DEVICE_P2P_ATTRIBUTE_NATIVE_ATOMIC_SUPPORTED = 0x03,
    CU_DEVICE_P2P_ATTRIBUTE_ACCESS_ACCESS_SUPPORTED = 0x04,
    CU_DEVICE_P2P_ATTRIBUTE_CUDA_ARRAY_ACCESS_SUPPORTED = 0x04
} CUdevice_P2PAttribute;


typedef void ( *CUstreamCallback)(CUstream hStream, CUresult status, void *userData);


typedef size_t ( *CUoccupancyB2DSize)(int blockSize);
# 2488 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef struct CUDA_MEMCPY2D_st {
    size_t srcXInBytes;
    size_t srcY;

    CUmemorytype srcMemoryType;
    const void *srcHost;
    CUdeviceptr srcDevice;
    CUarray srcArray;
    size_t srcPitch;

    size_t dstXInBytes;
    size_t dstY;

    CUmemorytype dstMemoryType;
    void *dstHost;
    CUdeviceptr dstDevice;
    CUarray dstArray;
    size_t dstPitch;

    size_t WidthInBytes;
    size_t Height;
} CUDA_MEMCPY2D_v2;
typedef CUDA_MEMCPY2D_v2 CUDA_MEMCPY2D;


typedef struct CUDA_MEMCPY3D_st {
    size_t srcXInBytes;
    size_t srcY;
    size_t srcZ;
    size_t srcLOD;
    CUmemorytype srcMemoryType;
    const void *srcHost;
    CUdeviceptr srcDevice;
    CUarray srcArray;
    void *reserved0;
    size_t srcPitch;
    size_t srcHeight;

    size_t dstXInBytes;
    size_t dstY;
    size_t dstZ;
    size_t dstLOD;
    CUmemorytype dstMemoryType;
    void *dstHost;
    CUdeviceptr dstDevice;
    CUarray dstArray;
    void *reserved1;
    size_t dstPitch;
    size_t dstHeight;

    size_t WidthInBytes;
    size_t Height;
    size_t Depth;
} CUDA_MEMCPY3D_v2;
typedef CUDA_MEMCPY3D_v2 CUDA_MEMCPY3D;


typedef struct CUDA_MEMCPY3D_PEER_st {
    size_t srcXInBytes;
    size_t srcY;
    size_t srcZ;
    size_t srcLOD;
    CUmemorytype srcMemoryType;
    const void *srcHost;
    CUdeviceptr srcDevice;
    CUarray srcArray;
    CUcontext srcContext;
    size_t srcPitch;
    size_t srcHeight;

    size_t dstXInBytes;
    size_t dstY;
    size_t dstZ;
    size_t dstLOD;
    CUmemorytype dstMemoryType;
    void *dstHost;
    CUdeviceptr dstDevice;
    CUarray dstArray;
    CUcontext dstContext;
    size_t dstPitch;
    size_t dstHeight;

    size_t WidthInBytes;
    size_t Height;
    size_t Depth;
} CUDA_MEMCPY3D_PEER_v1;
typedef CUDA_MEMCPY3D_PEER_v1 CUDA_MEMCPY3D_PEER;


typedef struct CUDA_ARRAY_DESCRIPTOR_st
{
    size_t Width;
    size_t Height;

    CUarray_format Format;
    unsigned int NumChannels;
} CUDA_ARRAY_DESCRIPTOR_v2;
typedef CUDA_ARRAY_DESCRIPTOR_v2 CUDA_ARRAY_DESCRIPTOR;


typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
{
    size_t Width;
    size_t Height;
    size_t Depth;

    CUarray_format Format;
    unsigned int NumChannels;
    unsigned int Flags;
} CUDA_ARRAY3D_DESCRIPTOR_v2;
typedef CUDA_ARRAY3D_DESCRIPTOR_v2 CUDA_ARRAY3D_DESCRIPTOR;
# 2616 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef struct CUDA_ARRAY_SPARSE_PROPERTIES_st {
    struct {
        unsigned int width;
        unsigned int height;
        unsigned int depth;
    } tileExtent;


    unsigned int miptailFirstLevel;


    unsigned long long miptailSize;


    unsigned int flags;
    unsigned int reserved[4];
} CUDA_ARRAY_SPARSE_PROPERTIES_v1;
typedef CUDA_ARRAY_SPARSE_PROPERTIES_v1 CUDA_ARRAY_SPARSE_PROPERTIES;


typedef struct CUDA_ARRAY_MEMORY_REQUIREMENTS_st {
    size_t size;
    size_t alignment;
    unsigned int reserved[4];
} CUDA_ARRAY_MEMORY_REQUIREMENTS_v1;
typedef CUDA_ARRAY_MEMORY_REQUIREMENTS_v1 CUDA_ARRAY_MEMORY_REQUIREMENTS;


typedef struct CUDA_RESOURCE_DESC_st
{
    CUresourcetype resType;

    union {
        struct {
            CUarray hArray;
        } array;
        struct {
            CUmipmappedArray hMipmappedArray;
        } mipmap;
        struct {
            CUdeviceptr devPtr;
            CUarray_format format;
            unsigned int numChannels;
            size_t sizeInBytes;
        } linear;
        struct {
            CUdeviceptr devPtr;
            CUarray_format format;
            unsigned int numChannels;
            size_t width;
            size_t height;
            size_t pitchInBytes;
        } pitch2D;
        struct {
            int reserved[32];
        } reserved;
    } res;

    unsigned int flags;
} CUDA_RESOURCE_DESC_v1;
typedef CUDA_RESOURCE_DESC_v1 CUDA_RESOURCE_DESC;


typedef struct CUDA_TEXTURE_DESC_st {
    CUaddress_mode addressMode[3];
    CUfilter_mode filterMode;
    unsigned int flags;
    unsigned int maxAnisotropy;
    CUfilter_mode mipmapFilterMode;
    float mipmapLevelBias;
    float minMipmapLevelClamp;
    float maxMipmapLevelClamp;
    float borderColor[4];
    int reserved[12];
} CUDA_TEXTURE_DESC_v1;
typedef CUDA_TEXTURE_DESC_v1 CUDA_TEXTURE_DESC;


typedef enum CUresourceViewFormat_enum
{
    CU_RES_VIEW_FORMAT_NONE = 0x00,
    CU_RES_VIEW_FORMAT_UINT_1X8 = 0x01,
    CU_RES_VIEW_FORMAT_UINT_2X8 = 0x02,
    CU_RES_VIEW_FORMAT_UINT_4X8 = 0x03,
    CU_RES_VIEW_FORMAT_SINT_1X8 = 0x04,
    CU_RES_VIEW_FORMAT_SINT_2X8 = 0x05,
    CU_RES_VIEW_FORMAT_SINT_4X8 = 0x06,
    CU_RES_VIEW_FORMAT_UINT_1X16 = 0x07,
    CU_RES_VIEW_FORMAT_UINT_2X16 = 0x08,
    CU_RES_VIEW_FORMAT_UINT_4X16 = 0x09,
    CU_RES_VIEW_FORMAT_SINT_1X16 = 0x0a,
    CU_RES_VIEW_FORMAT_SINT_2X16 = 0x0b,
    CU_RES_VIEW_FORMAT_SINT_4X16 = 0x0c,
    CU_RES_VIEW_FORMAT_UINT_1X32 = 0x0d,
    CU_RES_VIEW_FORMAT_UINT_2X32 = 0x0e,
    CU_RES_VIEW_FORMAT_UINT_4X32 = 0x0f,
    CU_RES_VIEW_FORMAT_SINT_1X32 = 0x10,
    CU_RES_VIEW_FORMAT_SINT_2X32 = 0x11,
    CU_RES_VIEW_FORMAT_SINT_4X32 = 0x12,
    CU_RES_VIEW_FORMAT_FLOAT_1X16 = 0x13,
    CU_RES_VIEW_FORMAT_FLOAT_2X16 = 0x14,
    CU_RES_VIEW_FORMAT_FLOAT_4X16 = 0x15,
    CU_RES_VIEW_FORMAT_FLOAT_1X32 = 0x16,
    CU_RES_VIEW_FORMAT_FLOAT_2X32 = 0x17,
    CU_RES_VIEW_FORMAT_FLOAT_4X32 = 0x18,
    CU_RES_VIEW_FORMAT_UNSIGNED_BC1 = 0x19,
    CU_RES_VIEW_FORMAT_UNSIGNED_BC2 = 0x1a,
    CU_RES_VIEW_FORMAT_UNSIGNED_BC3 = 0x1b,
    CU_RES_VIEW_FORMAT_UNSIGNED_BC4 = 0x1c,
    CU_RES_VIEW_FORMAT_SIGNED_BC4 = 0x1d,
    CU_RES_VIEW_FORMAT_UNSIGNED_BC5 = 0x1e,
    CU_RES_VIEW_FORMAT_SIGNED_BC5 = 0x1f,
    CU_RES_VIEW_FORMAT_UNSIGNED_BC6H = 0x20,
    CU_RES_VIEW_FORMAT_SIGNED_BC6H = 0x21,
    CU_RES_VIEW_FORMAT_UNSIGNED_BC7 = 0x22
} CUresourceViewFormat;


typedef struct CUDA_RESOURCE_VIEW_DESC_st
{
    CUresourceViewFormat format;
    size_t width;
    size_t height;
    size_t depth;
    unsigned int firstMipmapLevel;
    unsigned int lastMipmapLevel;
    unsigned int firstLayer;
    unsigned int lastLayer;
    unsigned int reserved[16];
} CUDA_RESOURCE_VIEW_DESC_v1;
typedef CUDA_RESOURCE_VIEW_DESC_v1 CUDA_RESOURCE_VIEW_DESC;


typedef struct CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_st {
    unsigned long long p2pToken;
    unsigned int vaSpaceToken;
} CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1;
typedef CUDA_POINTER_ATTRIBUTE_P2P_TOKENS_v1 CUDA_POINTER_ATTRIBUTE_P2P_TOKENS;


typedef enum CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS_enum {
    CU_POINTER_ATTRIBUTE_ACCESS_FLAG_NONE = 0x0,
    CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READ = 0x1,
    CU_POINTER_ATTRIBUTE_ACCESS_FLAG_READWRITE = 0x3
} CUDA_POINTER_ATTRIBUTE_ACCESS_FLAGS;


typedef struct CUDA_LAUNCH_PARAMS_st {
    CUfunction function;
    unsigned int gridDimX;
    unsigned int gridDimY;
    unsigned int gridDimZ;
    unsigned int blockDimX;
    unsigned int blockDimY;
    unsigned int blockDimZ;
    unsigned int sharedMemBytes;
    CUstream hStream;
    void **kernelParams;
} CUDA_LAUNCH_PARAMS_v1;
typedef CUDA_LAUNCH_PARAMS_v1 CUDA_LAUNCH_PARAMS;


typedef enum CUexternalMemoryHandleType_enum {


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,


    CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8
} CUexternalMemoryHandleType;
# 2877 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {


    CUexternalMemoryHandleType type;
    union {


        int fd;
# 2904 "/usr/local/cuda-11.7/include/cuda.h" 3
        struct {


            void *handle;


            const void *name;
        } win32;


        const void *nvSciBufObject;
    } handle;


    unsigned long long size;


    unsigned int flags;
    unsigned int reserved[16];
} CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1;
typedef CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1 CUDA_EXTERNAL_MEMORY_HANDLE_DESC;


typedef struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {


    unsigned long long offset;


    unsigned long long size;


    unsigned int flags;
    unsigned int reserved[16];
} CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1;
typedef CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1 CUDA_EXTERNAL_MEMORY_BUFFER_DESC;


typedef struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {


    unsigned long long offset;


    CUDA_ARRAY3D_DESCRIPTOR arrayDesc;


    unsigned int numLevels;
    unsigned int reserved[16];
} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1;
typedef CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1 CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;


typedef enum CUexternalSemaphoreHandleType_enum {


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,


 CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,


    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10
} CUexternalSemaphoreHandleType;


typedef struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {


    CUexternalSemaphoreHandleType type;
    union {


        int fd;
# 3051 "/usr/local/cuda-11.7/include/cuda.h" 3
        struct {


            void *handle;


            const void *name;
        } win32;


        const void* nvSciSyncObj;
    } handle;


    unsigned int flags;
    unsigned int reserved[16];
} CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1;
typedef CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1 CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC;


typedef struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
    struct {


        struct {


            unsigned long long value;
        } fence;
        union {


            void *fence;
            unsigned long long reserved;
        } nvSciSync;


        struct {


            unsigned long long key;
        } keyedMutex;
        unsigned int reserved[12];
    } params;
# 3118 "/usr/local/cuda-11.7/include/cuda.h" 3
    unsigned int flags;
    unsigned int reserved[16];
} CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1;
typedef CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1 CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS;


typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
    struct {


        struct {


            unsigned long long value;
        } fence;


        union {
            void *fence;
            unsigned long long reserved;
        } nvSciSync;


        struct {


            unsigned long long key;


            unsigned int timeoutMs;
        } keyedMutex;
        unsigned int reserved[10];
    } params;
# 3169 "/usr/local/cuda-11.7/include/cuda.h" 3
    unsigned int flags;
    unsigned int reserved[16];
} CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1;
typedef CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1 CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;


typedef struct CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_st {
    CUexternalSemaphore* extSemArray;
    const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS* paramsArray;
    unsigned int numExtSems;
} CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1;
typedef CUDA_EXT_SEM_SIGNAL_NODE_PARAMS_v1 CUDA_EXT_SEM_SIGNAL_NODE_PARAMS;


typedef struct CUDA_EXT_SEM_WAIT_NODE_PARAMS_st {
    CUexternalSemaphore* extSemArray;
    const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS* paramsArray;
    unsigned int numExtSems;
} CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1;
typedef CUDA_EXT_SEM_WAIT_NODE_PARAMS_v1 CUDA_EXT_SEM_WAIT_NODE_PARAMS;

typedef unsigned long long CUmemGenericAllocationHandle_v1;
typedef CUmemGenericAllocationHandle_v1 CUmemGenericAllocationHandle;


typedef enum CUmemAllocationHandleType_enum {
    CU_MEM_HANDLE_TYPE_NONE = 0x0,
    CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
    CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
    CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
    CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
} CUmemAllocationHandleType;


typedef enum CUmemAccess_flags_enum {
    CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
    CU_MEM_ACCESS_FLAGS_PROT_READ = 0x1,
    CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
    CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
} CUmemAccess_flags;


typedef enum CUmemLocationType_enum {
    CU_MEM_LOCATION_TYPE_INVALID = 0x0,
    CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
    CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
} CUmemLocationType;


typedef enum CUmemAllocationType_enum {
    CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,


    CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
    CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
} CUmemAllocationType;


typedef enum CUmemAllocationGranularity_flags_enum {
    CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
    CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1
} CUmemAllocationGranularity_flags;


typedef enum CUmemRangeHandleType_enum
{
    CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD = 0x1,
    CU_MEM_RANGE_HANDLE_TYPE_MAX = 0x7FFFFFFF
} CUmemRangeHandleType;


typedef enum CUarraySparseSubresourceType_enum {
    CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_SPARSE_LEVEL = 0,
    CU_ARRAY_SPARSE_SUBRESOURCE_TYPE_MIPTAIL = 1
} CUarraySparseSubresourceType;


typedef enum CUmemOperationType_enum {
    CU_MEM_OPERATION_TYPE_MAP = 1,
    CU_MEM_OPERATION_TYPE_UNMAP = 2
} CUmemOperationType;


typedef enum CUmemHandleType_enum {
    CU_MEM_HANDLE_TYPE_GENERIC = 0
} CUmemHandleType;


typedef struct CUarrayMapInfo_st {
    CUresourcetype resourceType;

    union {
        CUmipmappedArray mipmap;
        CUarray array;
    } resource;

    CUarraySparseSubresourceType subresourceType;

    union {
        struct {
            unsigned int level;
            unsigned int layer;
            unsigned int offsetX;
            unsigned int offsetY;
            unsigned int offsetZ;
            unsigned int extentWidth;
            unsigned int extentHeight;
            unsigned int extentDepth;
        } sparseLevel;
        struct {
            unsigned int layer;
            unsigned long long offset;
            unsigned long long size;
        } miptail;
    } subresource;

    CUmemOperationType memOperationType;
    CUmemHandleType memHandleType;

    union {
        CUmemGenericAllocationHandle memHandle;
    } memHandle;

    unsigned long long offset;
    unsigned int deviceBitMask;
    unsigned int flags;
    unsigned int reserved[2];
} CUarrayMapInfo_v1;
typedef CUarrayMapInfo_v1 CUarrayMapInfo;


typedef struct CUmemLocation_st {
    CUmemLocationType type;
    int id;
} CUmemLocation_v1;
typedef CUmemLocation_v1 CUmemLocation;


typedef enum CUmemAllocationCompType_enum {
    CU_MEM_ALLOCATION_COMP_NONE = 0x0,
    CU_MEM_ALLOCATION_COMP_GENERIC = 0x1
} CUmemAllocationCompType;
# 3352 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef struct CUmemAllocationProp_st {

    CUmemAllocationType type;

    CUmemAllocationHandleType requestedHandleTypes;

    CUmemLocation location;


    void *win32HandleMetaData;
    struct {
# 3378 "/usr/local/cuda-11.7/include/cuda.h" 3
         unsigned char compressionType;
         unsigned char gpuDirectRDMACapable;

         unsigned short usage;
         unsigned char reserved[4];
    } allocFlags;
} CUmemAllocationProp_v1;
typedef CUmemAllocationProp_v1 CUmemAllocationProp;


typedef struct CUmemAccessDesc_st {
    CUmemLocation location;
    CUmemAccess_flags flags;
} CUmemAccessDesc_v1;
typedef CUmemAccessDesc_v1 CUmemAccessDesc;

typedef enum CUgraphExecUpdateResult_enum {
    CU_GRAPH_EXEC_UPDATE_SUCCESS = 0x0,
    CU_GRAPH_EXEC_UPDATE_ERROR = 0x1,
    CU_GRAPH_EXEC_UPDATE_ERROR_TOPOLOGY_CHANGED = 0x2,
    CU_GRAPH_EXEC_UPDATE_ERROR_NODE_TYPE_CHANGED = 0x3,
    CU_GRAPH_EXEC_UPDATE_ERROR_FUNCTION_CHANGED = 0x4,
    CU_GRAPH_EXEC_UPDATE_ERROR_PARAMETERS_CHANGED = 0x5,
    CU_GRAPH_EXEC_UPDATE_ERROR_NOT_SUPPORTED = 0x6,
    CU_GRAPH_EXEC_UPDATE_ERROR_UNSUPPORTED_FUNCTION_CHANGE = 0x7,
    CU_GRAPH_EXEC_UPDATE_ERROR_ATTRIBUTES_CHANGED = 0x8
} CUgraphExecUpdateResult;


typedef enum CUmemPool_attribute_enum {
# 3420 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,


    CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,


    CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
# 3445 "/usr/local/cuda-11.7/include/cuda.h" 3
    CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,


    CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,


    CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,


    CU_MEMPOOL_ATTR_USED_MEM_CURRENT,


    CU_MEMPOOL_ATTR_USED_MEM_HIGH
} CUmemPool_attribute;


typedef struct CUmemPoolProps_st {
    CUmemAllocationType allocType;
    CUmemAllocationHandleType handleTypes;
    CUmemLocation location;


    void *win32SecurityAttributes;
    unsigned char reserved[64];
} CUmemPoolProps_v1;
typedef CUmemPoolProps_v1 CUmemPoolProps;


typedef struct CUmemPoolPtrExportData_st {
    unsigned char reserved[64];
} CUmemPoolPtrExportData_v1;
typedef CUmemPoolPtrExportData_v1 CUmemPoolPtrExportData;


typedef struct CUDA_MEM_ALLOC_NODE_PARAMS_st {


    CUmemPoolProps poolProps;
    const CUmemAccessDesc *accessDescs;
    size_t accessDescCount;
    size_t bytesize;
    CUdeviceptr dptr;
} CUDA_MEM_ALLOC_NODE_PARAMS;

typedef enum CUgraphMem_attribute_enum {


    CU_GRAPH_MEM_ATTR_USED_MEM_CURRENT,


    CU_GRAPH_MEM_ATTR_USED_MEM_HIGH,


    CU_GRAPH_MEM_ATTR_RESERVED_MEM_CURRENT,


    CU_GRAPH_MEM_ATTR_RESERVED_MEM_HIGH
} CUgraphMem_attribute;
# 3714 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef enum CUflushGPUDirectRDMAWritesOptions_enum {
    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST = 1<<0,
    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_MEMOPS = 1<<1
} CUflushGPUDirectRDMAWritesOptions;


typedef enum CUGPUDirectRDMAWritesOrdering_enum {
    CU_GPU_DIRECT_RDMA_WRITES_ORDERING_NONE = 0,
    CU_GPU_DIRECT_RDMA_WRITES_ORDERING_OWNER = 100,
    CU_GPU_DIRECT_RDMA_WRITES_ORDERING_ALL_DEVICES = 200
} CUGPUDirectRDMAWritesOrdering;


typedef enum CUflushGPUDirectRDMAWritesScope_enum {
    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_OWNER = 100,
    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TO_ALL_DEVICES = 200
} CUflushGPUDirectRDMAWritesScope;


typedef enum CUflushGPUDirectRDMAWritesTarget_enum {
    CU_FLUSH_GPU_DIRECT_RDMA_WRITES_TARGET_CURRENT_CTX = 0
} CUflushGPUDirectRDMAWritesTarget;


typedef enum CUgraphDebugDot_flags_enum {
    CU_GRAPH_DEBUG_DOT_FLAGS_VERBOSE = 1<<0,
    CU_GRAPH_DEBUG_DOT_FLAGS_RUNTIME_TYPES = 1<<1,
    CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_PARAMS = 1<<2,
    CU_GRAPH_DEBUG_DOT_FLAGS_MEMCPY_NODE_PARAMS = 1<<3,
    CU_GRAPH_DEBUG_DOT_FLAGS_MEMSET_NODE_PARAMS = 1<<4,
    CU_GRAPH_DEBUG_DOT_FLAGS_HOST_NODE_PARAMS = 1<<5,
    CU_GRAPH_DEBUG_DOT_FLAGS_EVENT_NODE_PARAMS = 1<<6,
    CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_SIGNAL_NODE_PARAMS = 1<<7,
    CU_GRAPH_DEBUG_DOT_FLAGS_EXT_SEMAS_WAIT_NODE_PARAMS = 1<<8,
    CU_GRAPH_DEBUG_DOT_FLAGS_KERNEL_NODE_ATTRIBUTES = 1<<9,
    CU_GRAPH_DEBUG_DOT_FLAGS_HANDLES = 1<<10,
    CU_GRAPH_DEBUG_DOT_FLAGS_MEM_ALLOC_NODE_PARAMS = 1<<11,
    CU_GRAPH_DEBUG_DOT_FLAGS_MEM_FREE_NODE_PARAMS = 1<<12

    ,
    CU_GRAPH_DEBUG_DOT_FLAGS_BATCH_MEM_OP_NODE_PARAMS = 1<<13

} CUgraphDebugDot_flags;


typedef enum CUuserObject_flags_enum {
    CU_USER_OBJECT_NO_DESTRUCTOR_SYNC = 1
} CUuserObject_flags;


typedef enum CUuserObjectRetain_flags_enum {
    CU_GRAPH_USER_OBJECT_MOVE = 1
} CUuserObjectRetain_flags;


typedef enum CUgraphInstantiate_flags_enum {
    CUDA_GRAPH_INSTANTIATE_FLAG_AUTO_FREE_ON_LAUNCH = 1


  , CUDA_GRAPH_INSTANTIATE_FLAG_USE_NODE_PRIORITY = 8


} CUgraphInstantiate_flags;
# 3840 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGetErrorString(CUresult error, const char **pStr);
# 3861 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGetErrorName(CUresult error, const char **pStr);
# 3895 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuInit(unsigned int Flags);
# 3933 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDriverGetVersion(int *driverVersion);
# 3976 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGet(CUdevice *device, int ordinal);
# 4005 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetCount(int *count);
# 4037 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetName(char *name, int len, CUdevice dev);
# 4070 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetUuid(CUuuid *uuid, CUdevice dev);
# 4099 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetUuid_v2(CUuuid *uuid, CUdevice dev);
# 4128 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, CUdevice dev);
# 4157 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceTotalMem_v2(size_t *bytes, CUdevice dev);
# 4188 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, CUarray_format format, unsigned numChannels, CUdevice dev);
# 4413 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
# 4462 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, CUdevice dev, int flags);
# 4480 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool);
# 4496 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev);
# 4514 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, CUdevice dev);
# 4546 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, CUflushGPUDirectRDMAWritesScope scope);
# 4625 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
# 4659 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
# 4726 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev);
# 4767 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDevicePrimaryCtxRelease_v2(CUdevice dev);
# 4832 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDevicePrimaryCtxSetFlags_v2(CUdevice dev, unsigned int flags);
# 4858 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, int *active);
# 4898 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDevicePrimaryCtxReset_v2(CUdevice dev);
# 4931 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetExecAffinitySupport(int *pi, CUexecAffinityType type, CUdevice dev);
# 5040 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxCreate_v2(CUcontext *pctx, unsigned int flags, CUdevice dev);
# 5147 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxCreate_v3(CUcontext *pctx, CUexecAffinityParam *paramsArray, int numParams, unsigned int flags, CUdevice dev);
# 5192 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxDestroy_v2(CUcontext ctx);
# 5226 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxPushCurrent_v2(CUcontext ctx);
# 5260 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxPopCurrent_v2(CUcontext *pctx);
# 5290 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxSetCurrent(CUcontext ctx);
# 5313 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetCurrent(CUcontext *pctx);
# 5343 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetDevice(CUdevice *device);
# 5371 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetFlags(unsigned int *flags);
# 5401 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxSynchronize(void);
# 5500 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxSetLimit(CUlimit limit, size_t value);
# 5542 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetLimit(size_t *pvalue, CUlimit limit);
# 5586 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetCacheConfig(CUfunc_cache *pconfig);
# 5637 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxSetCacheConfig(CUfunc_cache config);
# 5679 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetSharedMemConfig(CUsharedconfig *pConfig);
# 5732 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxSetSharedMemConfig(CUsharedconfig config);
# 5770 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetApiVersion(CUcontext ctx, unsigned int *version);
# 5810 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetStreamPriorityRange(int *leastPriority, int *greatestPriority);
# 5826 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxResetPersistingL2Cache(void);
# 5850 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxGetExecAffinity(CUexecAffinityParam *pExecAffinity, CUexecAffinityType type);
# 5906 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuCtxAttach(CUcontext *pctx, unsigned int flags);
# 5942 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuCtxDetach(CUcontext ctx);
# 5998 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleLoad(CUmodule *module, const char *fname);
# 6036 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleLoadData(CUmodule *module, const void *image);
# 6080 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
# 6123 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
# 6149 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleUnload(CUmodule hmod);
# 6179 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
# 6214 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleGetGlobal_v2(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
# 6248 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
# 6280 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
# 6320 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult
cuLinkCreate_v2(unsigned int numOptions, CUjit_option *options, void **optionValues, CUlinkState *stateOut);
# 6358 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult
cuLinkAddData_v2(CUlinkState state, CUjitInputType type, void *data, size_t size, const char *name,
    unsigned int numOptions, CUjit_option *options, void **optionValues);
# 6398 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult
cuLinkAddFile_v2(CUlinkState state, CUjitInputType type, const char *path,
    unsigned int numOptions, CUjit_option *options, void **optionValues);
# 6425 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult
cuLinkComplete(CUlinkState state, void **cubinOut, size_t *sizeOut);
# 6439 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult
cuLinkDestroy(CUlinkState state);
# 6498 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemGetInfo_v2(size_t *free, size_t *total);
# 6532 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAlloc_v2(CUdeviceptr *dptr, size_t bytesize);
# 6594 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAllocPitch_v2(CUdeviceptr *dptr, size_t *pPitch, size_t WidthInBytes, size_t Height, unsigned int ElementSizeBytes);
# 6630 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemFree_v2(CUdeviceptr dptr);
# 6664 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemGetAddressRange_v2(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
# 6711 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAllocHost_v2(void **pp, size_t bytesize);
# 6741 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemFreeHost(void *p);
# 6820 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
# 6873 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemHostGetDevicePointer_v2(CUdeviceptr *pdptr, void *p, unsigned int Flags);
# 6900 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemHostGetFlags(unsigned int *pFlags, void *p);
# 7010 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, unsigned int flags);
# 7039 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId);
# 7071 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev);
# 7116 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event);
# 7156 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuIpcOpenEventHandle(CUevent *phEvent, CUipcEventHandle handle);
# 7197 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr);
# 7255 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuIpcOpenMemHandle_v2(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int Flags);
# 7289 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuIpcCloseMemHandle(CUdeviceptr dptr);
# 7376 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemHostRegister_v2(void *p, size_t bytesize, unsigned int Flags);
# 7402 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemHostUnregister(void *p);
# 7442 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
# 7472 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
# 7508 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyHtoD_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
# 7544 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyDtoH_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
# 7580 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyDtoD_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
# 7616 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyDtoA_v2(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
# 7654 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyAtoD_v2(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
# 7691 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyHtoA_v2(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
# 7728 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyAtoH_v2(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
# 7768 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyAtoA_v2(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
# 7932 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy2D_v2(const CUDA_MEMCPY2D *pCopy);
# 8094 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy2DUnaligned_v2(const CUDA_MEMCPY2D *pCopy);
# 8263 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy3D_v2(const CUDA_MEMCPY3D *pCopy);
# 8287 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy);
# 8332 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount, CUstream hStream);
# 8365 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount, CUstream hStream);
# 8406 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyHtoDAsync_v2(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount, CUstream hStream);
# 8447 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyDtoHAsync_v2(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
# 8488 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyDtoDAsync_v2(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
# 8530 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyHtoAAsync_v2(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount, CUstream hStream);
# 8572 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpyAtoHAsync_v2(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount, CUstream hStream);
# 8741 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy2DAsync_v2(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
# 8915 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy3DAsync_v2(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
# 8941 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, CUstream hStream);
# 8976 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD8_v2(CUdeviceptr dstDevice, unsigned char uc, size_t N);
# 9011 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD16_v2(CUdeviceptr dstDevice, unsigned short us, size_t N);
# 9046 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD32_v2(CUdeviceptr dstDevice, unsigned int ui, size_t N);
# 9086 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD2D8_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height);
# 9127 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD2D16_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height);
# 9168 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD2D32_v2(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height);
# 9205 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, size_t N, CUstream hStream);
# 9242 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, size_t N, CUstream hStream);
# 9278 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, size_t N, CUstream hStream);
# 9320 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned char uc, size_t Width, size_t Height, CUstream hStream);
# 9363 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned short us, size_t Width, size_t Height, CUstream hStream);
# 9406 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, unsigned int ui, size_t Width, size_t Height, CUstream hStream);
# 9510 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArrayCreate_v2(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
# 9544 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArrayGetDescriptor_v2(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
# 9568 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUarray array);
# 9593 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMipmappedArrayGetSparseProperties(CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUmipmappedArray mipmap);
# 9617 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, CUarray array, CUdevice device);
# 9641 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMipmappedArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, CUmipmappedArray mipmap, CUdevice device);
# 9675 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArrayGetPlane(CUarray *pPlaneArray, CUarray hArray, unsigned int planeIdx);
# 9706 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArrayDestroy(CUarray hArray);
# 9886 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArray3DCreate_v2(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
# 9924 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuArray3DGetDescriptor_v2(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
# 10066 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMipmappedArrayCreate(CUmipmappedArray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, unsigned int numMipmapLevels);
# 10096 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMipmappedArrayGetLevel(CUarray *pLevelArray, CUmipmappedArray hMipmappedArray, unsigned int level);
# 10121 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray);
# 10162 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAddressReserve(CUdeviceptr *ptr, size_t size, size_t alignment, CUdeviceptr addr, unsigned long long flags);
# 10183 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAddressFree(CUdeviceptr ptr, size_t size);
# 10222 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, const CUmemAllocationProp *prop, unsigned long long flags);
# 10249 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemRelease(CUmemGenericAllocationHandle handle);
# 10293 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, CUmemGenericAllocationHandle handle, unsigned long long flags);
# 10434 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemMapArrayAsync(CUarrayMapInfo *mapInfoList, unsigned int count, CUstream hStream);
# 10463 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemUnmap(CUdeviceptr ptr, size_t size);
# 10488 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemSetAccess(CUdeviceptr ptr, size_t size, const CUmemAccessDesc *desc, size_t count);
# 10507 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemGetAccess(unsigned long long *flags, const CUmemLocation *location, CUdeviceptr ptr);
# 10540 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemExportToShareableHandle(void *shareableHandle, CUmemGenericAllocationHandle handle, CUmemAllocationHandleType handleType, unsigned long long flags);
# 10567 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemImportFromShareableHandle(CUmemGenericAllocationHandle *handle, void *osHandle, CUmemAllocationHandleType shHandleType);
# 10589 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemGetAllocationGranularity(size_t *granularity, const CUmemAllocationProp *prop, CUmemAllocationGranularity_flags option);
# 10606 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemGetAllocationPropertiesFromHandle(CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle);
# 10630 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr);
# 10687 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream);
# 10720 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, CUstream hStream);
# 10746 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep);
# 10789 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolSetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value);
# 10836 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolGetAttribute(CUmemoryPool pool, CUmemPool_attribute attr, void *value);
# 10853 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolSetAccess(CUmemoryPool pool, const CUmemAccessDesc *map, size_t count);
# 10867 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolGetAccess(CUmemAccess_flags *flags, CUmemoryPool memPool, CUmemLocation *location);
# 10889 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolCreate(CUmemoryPool *pool, const CUmemPoolProps *poolProps);
# 10912 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolDestroy(CUmemoryPool pool);
# 10950 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, CUmemoryPool pool, CUstream hStream);
# 10979 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolExportToShareableHandle(void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, unsigned long long flags);
# 11003 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolImportFromShareableHandle(
        CUmemoryPool *pool_out,
        void *handle,
        CUmemAllocationHandleType handleType,
        unsigned long long flags);
# 11027 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, CUdeviceptr ptr);
# 11056 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, CUmemPoolPtrExportData *shareData);
# 11341 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuPointerGetAttribute(void *data, CUpointer_attribute attribute, CUdeviceptr ptr);
# 11409 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, CUdevice dstDevice, CUstream hStream);
# 11523 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemAdvise(CUdeviceptr devPtr, size_t count, CUmem_advise advice, CUdevice device);
# 11581 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemRangeGetAttribute(void *data, size_t dataSize, CUmem_range_attribute attribute, CUdeviceptr devPtr, size_t count);
# 11621 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemRangeGetAttributes(void **data, size_t *dataSizes, CUmem_range_attribute *attributes, size_t numAttributes, CUdeviceptr devPtr, size_t count);
# 11663 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuPointerSetAttribute(const void *value, CUpointer_attribute attribute, CUdeviceptr ptr);
# 11712 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuPointerGetAttributes(unsigned int numAttributes, CUpointer_attribute *attributes, void **data, CUdeviceptr ptr);
# 11763 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamCreate(CUstream *phStream, unsigned int Flags);
# 11812 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamCreateWithPriority(CUstream *phStream, unsigned int flags, int priority);
# 11843 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamGetPriority(CUstream hStream, int *priority);
# 11871 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamGetFlags(CUstream hStream, unsigned int *flags);
# 11915 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamGetCtx(CUstream hStream, CUcontext *pctx);
# 11952 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags);
# 12027 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamAddCallback(CUstream hStream, CUstreamCallback callback, void *userData, unsigned int flags);
# 12065 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamBeginCapture_v2(CUstream hStream, CUstreamCaptureMode mode);
# 12118 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode);
# 12148 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamEndCapture(CUstream hStream, CUgraph *phGraph);
# 12188 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamIsCapturing(CUstream hStream, CUstreamCaptureStatus *captureStatus);
# 12216 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamGetCaptureInfo(CUstream hStream, CUstreamCaptureStatus *captureStatus_out, cuuint64_t *id_out);
# 12270 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamGetCaptureInfo_v2(CUstream hStream, CUstreamCaptureStatus *captureStatus_out,
        cuuint64_t *id_out, CUgraph *graph_out, const CUgraphNode **dependencies_out, size_t *numDependencies_out);
# 12303 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamUpdateCaptureDependencies(CUstream hStream, CUgraphNode *dependencies, size_t numDependencies, unsigned int flags);
# 12391 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, size_t length, unsigned int flags);
# 12421 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamQuery(CUstream hStream);
# 12450 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamSynchronize(CUstream hStream);
# 12480 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamDestroy_v2(CUstream hStream);
# 12500 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamCopyAttributes(CUstream dst, CUstream src);
# 12521 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr,
                                      CUstreamAttrValue *value_out);
# 12544 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr,
                                      const CUstreamAttrValue *value);
# 12601 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuEventCreate(CUevent *phEvent, unsigned int Flags);
# 12643 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuEventRecord(CUevent hEvent, CUstream hStream);
# 12692 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, unsigned int flags);
# 12724 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuEventQuery(CUevent hEvent);
# 12755 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuEventSynchronize(CUevent hEvent);
# 12784 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuEventDestroy_v2(CUevent hEvent);
# 12828 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
# 13006 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuImportExternalMemory(CUexternalMemory *extMem_out, const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc);
# 13060 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuExternalMemoryGetMappedBuffer(CUdeviceptr *devPtr, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc);
# 13116 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuExternalMemoryGetMappedMipmappedArray(CUmipmappedArray *mipmap, CUexternalMemory extMem, const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc);
# 13138 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDestroyExternalMemory(CUexternalMemory extMem);
# 13295 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuImportExternalSemaphore(CUexternalSemaphore *extSem_out, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc);
# 13362 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuSignalExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream);
# 13438 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuWaitExternalSemaphoresAsync(const CUexternalSemaphore *extSemArray, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, unsigned int numExtSems, CUstream stream);
# 13459 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDestroyExternalSemaphore(CUexternalSemaphore extSem);
# 13638 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
# 13683 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
# 13718 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
# 13752 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
# 13797 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamBatchMemOp(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags);
# 13844 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWaitValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
# 13889 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWaitValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
# 13918 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWriteValue32_v2(CUstream stream, CUdeviceptr addr, cuuint32_t value, unsigned int flags);
# 13949 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamWriteValue64_v2(CUstream stream, CUdeviceptr addr, cuuint64_t value, unsigned int flags);
# 13992 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuStreamBatchMemOp_v2(CUstream stream, unsigned int count, CUstreamBatchMemOpParams *paramArray, unsigned int flags);
# 14090 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
# 14155 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuFuncSetAttribute(CUfunction hfunc, CUfunction_attribute attrib, int value);
# 14199 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
# 14251 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuFuncSetSharedMemConfig(CUfunction hfunc, CUsharedconfig config);
# 14277 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuFuncGetModule(CUmodule *hmod, CUfunction hfunc);
# 14386 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuLaunchKernel(CUfunction f,
                                unsigned int gridDimX,
                                unsigned int gridDimY,
                                unsigned int gridDimZ,
                                unsigned int blockDimX,
                                unsigned int blockDimY,
                                unsigned int blockDimZ,
                                unsigned int sharedMemBytes,
                                CUstream hStream,
                                void **kernelParams,
                                void **extra);
# 14664 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuLaunchCooperativeKernel(CUfunction f,
                                unsigned int gridDimX,
                                unsigned int gridDimY,
                                unsigned int gridDimZ,
                                unsigned int blockDimX,
                                unsigned int blockDimY,
                                unsigned int blockDimZ,
                                unsigned int sharedMemBytes,
                                CUstream hStream,
                                void **kernelParams);
# 14810 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuLaunchCooperativeKernelMultiDevice(CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, unsigned int flags);
# 14875 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuLaunchHostFunc(CUstream hStream, CUhostFn fn, void *userData);
# 14925 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
# 14959 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
# 14991 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuParamSetSize(CUfunction hfunc, unsigned int numbytes);
# 15024 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuParamSeti(CUfunction hfunc, int offset, unsigned int value);
# 15057 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuParamSetf(CUfunction hfunc, int offset, float value);
# 15092 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
# 15144 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuLaunch(CUfunction f);
# 15198 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuLaunchGrid(CUfunction f, int grid_width, int grid_height);
# 15260 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
# 15285 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
# 15331 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphCreate(CUgraph *phGraph, unsigned int flags);
# 15431 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddKernelNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams);
# 15463 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphKernelNodeGetParams(CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams);
# 15486 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphKernelNodeSetParams(CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams);
# 15534 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMCPY3D *copyParams, CUcontext ctx);
# 15557 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphMemcpyNodeGetParams(CUgraphNode hNode, CUDA_MEMCPY3D *nodeParams);
# 15580 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphMemcpyNodeSetParams(CUgraphNode hNode, const CUDA_MEMCPY3D *nodeParams);
# 15622 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddMemsetNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx);
# 15645 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphMemsetNodeGetParams(CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams);
# 15668 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphMemsetNodeSetParams(CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams);
# 15709 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_HOST_NODE_PARAMS *nodeParams);
# 15732 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphHostNodeGetParams(CUgraphNode hNode, CUDA_HOST_NODE_PARAMS *nodeParams);
# 15755 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphHostNodeSetParams(CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams);
# 15795 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUgraph childGraph);
# 15822 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, CUgraph *phGraph);
# 15860 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies);
# 15901 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event);
# 15926 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, CUevent *event_out);
# 15951 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, CUevent event);
# 15993 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUevent event);
# 16018 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, CUevent *event_out);
# 16043 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, CUevent event);
# 16091 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddExternalSemaphoresSignalNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams);
# 16122 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExternalSemaphoresSignalNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out);
# 16147 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExternalSemaphoresSignalNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams);
# 16195 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddExternalSemaphoresWaitNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams);
# 16226 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExternalSemaphoresWaitNodeGetParams(CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out);
# 16251 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExternalSemaphoresWaitNodeSetParams(CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams);
# 16309 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddBatchMemOpNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams);
# 16337 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphBatchMemOpNodeGetParams(CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams_out);
# 16363 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphBatchMemOpNodeSetParams(CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams);
# 16409 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecBatchMemOpNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams);
# 16484 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddMemAllocNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS *nodeParams);
# 16509 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphMemAllocNodeGetParams(CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS *params_out);
# 16566 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddMemFreeNode(CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, size_t numDependencies, CUdeviceptr dptr);
# 16588 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphMemFreeNodeGetParams(CUgraphNode hNode, CUdeviceptr *dptr_out);
# 16608 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGraphMemTrim(CUdevice device);
# 16636 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value);
# 16661 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceSetGraphMemAttribute(CUdevice device, CUgraphMem_attribute attr, void* value);
# 16686 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph);
# 16712 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphNodeFindInClone(CUgraphNode *phNode, CUgraphNode hOriginalNode, CUgraph hClonedGraph);
# 16743 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type);
# 16774 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, size_t *numNodes);
# 16805 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, size_t *numRootNodes);
# 16839 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, CUgraphNode *to, size_t *numEdges);
# 16870 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphNodeGetDependencies(CUgraphNode hNode, CUgraphNode *dependencies, size_t *numDependencies);
# 16902 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphNodeGetDependentNodes(CUgraphNode hNode, CUgraphNode *dependentNodes, size_t *numDependentNodes);
# 16931 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies);
# 16963 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphRemoveDependencies(CUgraph hGraph, const CUgraphNode *from, const CUgraphNode *to, size_t numDependencies);
# 16990 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphDestroyNode(CUgraphNode hNode);
# 17028 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphInstantiate_v2(CUgraphExec *phGraphExec, CUgraph hGraph, CUgraphNode *phErrorNode, char *logBuffer, size_t bufferSize);
# 17077 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphInstantiateWithFlags(CUgraphExec *phGraphExec, CUgraph hGraph, unsigned long long flags);
# 17245 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams);
# 17291 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMCPY3D *copyParams, CUcontext ctx);
# 17337 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecMemsetNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx);
# 17374 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams);
# 17417 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, CUgraph childGraph);
# 17457 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
# 17497 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, CUgraphNode hNode, CUevent event);
# 17540 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecExternalSemaphoresSignalNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams);
# 17583 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecExternalSemaphoresWaitNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams);
# 17656 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphNodeSetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int isEnabled);
# 17717 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphNodeGetEnabled(CUgraphExec hGraphExec, CUgraphNode hNode, unsigned int *isEnabled);
# 17743 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream);
# 17773 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream);
# 17798 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecDestroy(CUgraphExec hGraphExec);
# 17818 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphDestroy(CUgraph hGraph);
# 17974 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, CUgraphNode *hErrorNode_out, CUgraphExecUpdateResult *updateResult_out);
# 17994 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphKernelNodeCopyAttributes(CUgraphNode dst, CUgraphNode src);
# 18015 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr,
                                      CUkernelNodeAttrValue *value_out);
# 18037 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr,
                                      const CUkernelNodeAttrValue *value);
# 18057 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphDebugDotPrint(CUgraph hGraph, const char *path, unsigned int flags);
# 18093 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuUserObjectCreate(CUuserObject *object_out, void *ptr, CUhostFn destroy,
                                    unsigned int initialRefcount, unsigned int flags);
# 18118 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuUserObjectRetain(CUuserObject object, unsigned int count);
# 18146 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuUserObjectRelease(CUuserObject object, unsigned int count);
# 18174 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphRetainUserObject(CUgraph graph, CUuserObject object, unsigned int count, unsigned int flags);
# 18199 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, unsigned int count);
# 18238 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize);
# 18280 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
# 18332 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuOccupancyMaxPotentialBlockSize(int *minGridSize, int *blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit);
# 18378 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuOccupancyMaxPotentialBlockSizeWithFlags(int *minGridSize, int *blockSize, CUfunction func, CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, int blockSizeLimit, unsigned int flags);
# 18401 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, CUfunction func, int numBlocks, int blockSize);
# 18446 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
# 18476 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetMipmappedArray(CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags);
# 18522 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetAddress_v2(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
# 18577 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetAddress2D_v3(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
# 18612 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
# 18658 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
# 18694 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
# 18730 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm);
# 18759 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetMipmapLevelBias(CUtexref hTexRef, float bias);
# 18790 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetMipmapLevelClamp(CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp);
# 18820 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso);
# 18856 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetBorderColor(CUtexref hTexRef, float *pBorderColor);
# 18901 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
# 18928 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetAddress_v2(CUdeviceptr *pdptr, CUtexref hTexRef);
# 18955 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
# 18982 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetMipmappedArray(CUmipmappedArray *phMipmappedArray, CUtexref hTexRef);
# 19010 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
# 19036 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
# 19064 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
# 19090 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
# 19116 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef);
# 19143 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, float *pmaxMipmapLevelClamp, CUtexref hTexRef);
# 19169 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetMaxAnisotropy(int *pmaxAniso, CUtexref hTexRef);
# 19198 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetBorderColor(float *pBorderColor, CUtexref hTexRef);
# 19223 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
# 19248 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefCreate(CUtexref *pTexRef);
# 19268 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuTexRefDestroy(CUtexref hTexRef);
# 19312 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
# 19335 "/usr/local/cuda-11.7/include/cuda.h" 3
__attribute__((deprecated)) CUresult cuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
# 19575 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, const CUDA_TEXTURE_DESC *pTexDesc, const CUDA_RESOURCE_VIEW_DESC *pResViewDesc);
# 19595 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuTexObjectDestroy(CUtexObject texObject);
# 19616 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, CUtexObject texObject);
# 19637 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, CUtexObject texObject);
# 19659 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuTexObjectGetResourceViewDesc(CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject);
# 19702 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuSurfObjectCreate(CUsurfObject *pSurfObject, const CUDA_RESOURCE_DESC *pResDesc);
# 19722 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuSurfObjectDestroy(CUsurfObject surfObject);
# 19743 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, CUsurfObject surfObject);
# 19785 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, CUdevice peerDev);
# 19838 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxEnablePeerAccess(CUcontext peerContext, unsigned int Flags);
# 19865 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuCtxDisablePeerAccess(CUcontext peerContext);
# 19905 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuDeviceGetP2PAttribute(int* value, CUdevice_P2PAttribute attrib, CUdevice srcDevice, CUdevice dstDevice);
# 19949 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphicsUnregisterResource(CUgraphicsResource resource);
# 19989 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
# 20020 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphicsResourceGetMappedMipmappedArray(CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource);
# 20054 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphicsResourceGetMappedPointer_v2(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
# 20095 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphicsResourceSetMapFlags_v2(CUgraphicsResource resource, unsigned int flags);
# 20135 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
# 20172 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
# 20239 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuGetProcAddress(const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags);
# 20249 "/usr/local/cuda-11.7/include/cuda.h" 3
typedef enum CUmoduleLoadingMode_enum {
    CU_MODULE_EAGER_LOADING = 0x1,
    CU_MODULE_LAZY_LOADING = 0x2,
} CUmoduleLoadingMode;
# 20270 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuModuleGetLoadingMode(CUmoduleLoadingMode *mode);
# 20305 "/usr/local/cuda-11.7/include/cuda.h" 3
CUresult cuMemGetHandleForAddressRange(void *handle, CUdeviceptr dptr, size_t size, CUmemRangeHandleType handleType, unsigned long long flags);


CUresult cuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
# 20681 "/usr/local/cuda-11.7/include/cuda.h" 3
}
# 56 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 73 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_builtin_vars.h" 1 3
# 14 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_builtin_vars.h" 3
struct uint3;
struct dim3;
# 52 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_builtin_vars.h" 3
struct __cuda_builtin_threadIdx_t {
  __declspec(property(get = __fetch_builtin_x)) unsigned int x; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_x(void) { return __nvvm_read_ptx_sreg_tid_x(); };
  __declspec(property(get = __fetch_builtin_y)) unsigned int y; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_y(void) { return __nvvm_read_ptx_sreg_tid_y(); };
  __declspec(property(get = __fetch_builtin_z)) unsigned int z; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_z(void) { return __nvvm_read_ptx_sreg_tid_z(); };


  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;

private:
  __attribute__((device)) __cuda_builtin_threadIdx_t() =delete; __attribute__((device)) __cuda_builtin_threadIdx_t(const __cuda_builtin_threadIdx_t &) =delete; __attribute__((device)) void operator=(const __cuda_builtin_threadIdx_t &) const =delete; __attribute__((device)) __cuda_builtin_threadIdx_t *operator&() const =delete;
};

struct __cuda_builtin_blockIdx_t {
  __declspec(property(get = __fetch_builtin_x)) unsigned int x; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_x(void) { return __nvvm_read_ptx_sreg_ctaid_x(); };
  __declspec(property(get = __fetch_builtin_y)) unsigned int y; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_y(void) { return __nvvm_read_ptx_sreg_ctaid_y(); };
  __declspec(property(get = __fetch_builtin_z)) unsigned int z; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_z(void) { return __nvvm_read_ptx_sreg_ctaid_z(); };


  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;

private:
  __attribute__((device)) __cuda_builtin_blockIdx_t() =delete; __attribute__((device)) __cuda_builtin_blockIdx_t(const __cuda_builtin_blockIdx_t &) =delete; __attribute__((device)) void operator=(const __cuda_builtin_blockIdx_t &) const =delete; __attribute__((device)) __cuda_builtin_blockIdx_t *operator&() const =delete;
};

struct __cuda_builtin_blockDim_t {
  __declspec(property(get = __fetch_builtin_x)) unsigned int x; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_x(void) { return __nvvm_read_ptx_sreg_ntid_x(); };
  __declspec(property(get = __fetch_builtin_y)) unsigned int y; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_y(void) { return __nvvm_read_ptx_sreg_ntid_y(); };
  __declspec(property(get = __fetch_builtin_z)) unsigned int z; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_z(void) { return __nvvm_read_ptx_sreg_ntid_z(); };


  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;

private:
  __attribute__((device)) __cuda_builtin_blockDim_t() =delete; __attribute__((device)) __cuda_builtin_blockDim_t(const __cuda_builtin_blockDim_t &) =delete; __attribute__((device)) void operator=(const __cuda_builtin_blockDim_t &) const =delete; __attribute__((device)) __cuda_builtin_blockDim_t *operator&() const =delete;
};

struct __cuda_builtin_gridDim_t {
  __declspec(property(get = __fetch_builtin_x)) unsigned int x; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_x(void) { return __nvvm_read_ptx_sreg_nctaid_x(); };
  __declspec(property(get = __fetch_builtin_y)) unsigned int y; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_y(void) { return __nvvm_read_ptx_sreg_nctaid_y(); };
  __declspec(property(get = __fetch_builtin_z)) unsigned int z; static inline __attribute__((always_inline)) __attribute__((device)) unsigned int __fetch_builtin_z(void) { return __nvvm_read_ptx_sreg_nctaid_z(); };


  __attribute__((device)) operator dim3() const;
  __attribute__((device)) operator uint3() const;

private:
  __attribute__((device)) __cuda_builtin_gridDim_t() =delete; __attribute__((device)) __cuda_builtin_gridDim_t(const __cuda_builtin_gridDim_t &) =delete; __attribute__((device)) void operator=(const __cuda_builtin_gridDim_t &) const =delete; __attribute__((device)) __cuda_builtin_gridDim_t *operator&() const =delete;
};


extern const __attribute__((device)) __attribute__((weak)) __cuda_builtin_threadIdx_t threadIdx;
extern const __attribute__((device)) __attribute__((weak)) __cuda_builtin_blockIdx_t blockIdx;
extern const __attribute__((device)) __attribute__((weak)) __cuda_builtin_blockDim_t blockDim;
extern const __attribute__((device)) __attribute__((weak)) __cuda_builtin_gridDim_t gridDim;


__attribute__((device)) const int warpSize = 32;
# 74 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 98 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/host_defines.h" 1 3
# 60 "/usr/local/cuda-11.7/include/host_defines.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 61 "/usr/local/cuda-11.7/include/host_defines.h" 2 3
# 99 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3

# 1 "/usr/local/cuda-11.7/include/driver_types.h" 1 3
# 59 "/usr/local/cuda-11.7/include/driver_types.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 60 "/usr/local/cuda-11.7/include/driver_types.h" 2 3

# 1 "/usr/local/cuda-11.7/include/vector_types.h" 1 3
# 65 "/usr/local/cuda-11.7/include/vector_types.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 66 "/usr/local/cuda-11.7/include/vector_types.h" 2 3
# 100 "/usr/local/cuda-11.7/include/vector_types.h" 3
struct __attribute__((device_builtin)) char1
{
    signed char x;
};

struct __attribute__((device_builtin)) uchar1
{
    unsigned char x;
};


struct __attribute__((device_builtin)) __attribute__((aligned(2))) char2
{
    signed char x, y;
};

struct __attribute__((device_builtin)) __attribute__((aligned(2))) uchar2
{
    unsigned char x, y;
};

struct __attribute__((device_builtin)) char3
{
    signed char x, y, z;
};

struct __attribute__((device_builtin)) uchar3
{
    unsigned char x, y, z;
};

struct __attribute__((device_builtin)) __attribute__((aligned(4))) char4
{
    signed char x, y, z, w;
};

struct __attribute__((device_builtin)) __attribute__((aligned(4))) uchar4
{
    unsigned char x, y, z, w;
};

struct __attribute__((device_builtin)) short1
{
    short x;
};

struct __attribute__((device_builtin)) ushort1
{
    unsigned short x;
};

struct __attribute__((device_builtin)) __attribute__((aligned(4))) short2
{
    short x, y;
};

struct __attribute__((device_builtin)) __attribute__((aligned(4))) ushort2
{
    unsigned short x, y;
};

struct __attribute__((device_builtin)) short3
{
    short x, y, z;
};

struct __attribute__((device_builtin)) ushort3
{
    unsigned short x, y, z;
};

struct __attribute__((device_builtin)) __attribute__((aligned(8))) short4 { short x; short y; short z; short w; };
struct __attribute__((device_builtin)) __attribute__((aligned(8))) ushort4 { unsigned short x; unsigned short y; unsigned short z; unsigned short w; };

struct __attribute__((device_builtin)) int1
{
    int x;
};

struct __attribute__((device_builtin)) uint1
{
    unsigned int x;
};

struct __attribute__((device_builtin)) __attribute__((aligned(8))) int2 { int x; int y; };
struct __attribute__((device_builtin)) __attribute__((aligned(8))) uint2 { unsigned int x; unsigned int y; };

struct __attribute__((device_builtin)) int3
{
    int x, y, z;
};

struct __attribute__((device_builtin)) uint3
{
    unsigned int x, y, z;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) int4
{
    int x, y, z, w;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) uint4
{
    unsigned int x, y, z, w;
};

struct __attribute__((device_builtin)) long1
{
    long int x;
};

struct __attribute__((device_builtin)) ulong1
{
    unsigned long x;
};


struct __attribute__((device_builtin)) __attribute__((aligned(2*sizeof(long int)))) long2
{
    long int x, y;
};

struct __attribute__((device_builtin)) __attribute__((aligned(2*sizeof(unsigned long int)))) ulong2
{
    unsigned long int x, y;
};


struct __attribute__((device_builtin)) long3
{
    long int x, y, z;
};

struct __attribute__((device_builtin)) ulong3
{
    unsigned long int x, y, z;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) long4
{
    long int x, y, z, w;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) ulong4
{
    unsigned long int x, y, z, w;
};

struct __attribute__((device_builtin)) float1
{
    float x;
};
# 276 "/usr/local/cuda-11.7/include/vector_types.h" 3
struct __attribute__((device_builtin)) __attribute__((aligned(8))) float2 { float x; float y; };


struct __attribute__((device_builtin)) float3
{
    float x, y, z;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) float4
{
    float x, y, z, w;
};

struct __attribute__((device_builtin)) longlong1
{
    long long int x;
};

struct __attribute__((device_builtin)) ulonglong1
{
    unsigned long long int x;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) longlong2
{
    long long int x, y;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) ulonglong2
{
    unsigned long long int x, y;
};

struct __attribute__((device_builtin)) longlong3
{
    long long int x, y, z;
};

struct __attribute__((device_builtin)) ulonglong3
{
    unsigned long long int x, y, z;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) longlong4
{
    long long int x, y, z ,w;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) ulonglong4
{
    unsigned long long int x, y, z, w;
};

struct __attribute__((device_builtin)) double1
{
    double x;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) double2
{
    double x, y;
};

struct __attribute__((device_builtin)) double3
{
    double x, y, z;
};

struct __attribute__((device_builtin)) __attribute__((aligned(16))) double4
{
    double x, y, z, w;
};
# 363 "/usr/local/cuda-11.7/include/vector_types.h" 3
typedef __attribute__((device_builtin)) struct char1 char1;
typedef __attribute__((device_builtin)) struct uchar1 uchar1;
typedef __attribute__((device_builtin)) struct char2 char2;
typedef __attribute__((device_builtin)) struct uchar2 uchar2;
typedef __attribute__((device_builtin)) struct char3 char3;
typedef __attribute__((device_builtin)) struct uchar3 uchar3;
typedef __attribute__((device_builtin)) struct char4 char4;
typedef __attribute__((device_builtin)) struct uchar4 uchar4;
typedef __attribute__((device_builtin)) struct short1 short1;
typedef __attribute__((device_builtin)) struct ushort1 ushort1;
typedef __attribute__((device_builtin)) struct short2 short2;
typedef __attribute__((device_builtin)) struct ushort2 ushort2;
typedef __attribute__((device_builtin)) struct short3 short3;
typedef __attribute__((device_builtin)) struct ushort3 ushort3;
typedef __attribute__((device_builtin)) struct short4 short4;
typedef __attribute__((device_builtin)) struct ushort4 ushort4;
typedef __attribute__((device_builtin)) struct int1 int1;
typedef __attribute__((device_builtin)) struct uint1 uint1;
typedef __attribute__((device_builtin)) struct int2 int2;
typedef __attribute__((device_builtin)) struct uint2 uint2;
typedef __attribute__((device_builtin)) struct int3 int3;
typedef __attribute__((device_builtin)) struct uint3 uint3;
typedef __attribute__((device_builtin)) struct int4 int4;
typedef __attribute__((device_builtin)) struct uint4 uint4;
typedef __attribute__((device_builtin)) struct long1 long1;
typedef __attribute__((device_builtin)) struct ulong1 ulong1;
typedef __attribute__((device_builtin)) struct long2 long2;
typedef __attribute__((device_builtin)) struct ulong2 ulong2;
typedef __attribute__((device_builtin)) struct long3 long3;
typedef __attribute__((device_builtin)) struct ulong3 ulong3;
typedef __attribute__((device_builtin)) struct long4 long4;
typedef __attribute__((device_builtin)) struct ulong4 ulong4;
typedef __attribute__((device_builtin)) struct float1 float1;
typedef __attribute__((device_builtin)) struct float2 float2;
typedef __attribute__((device_builtin)) struct float3 float3;
typedef __attribute__((device_builtin)) struct float4 float4;
typedef __attribute__((device_builtin)) struct longlong1 longlong1;
typedef __attribute__((device_builtin)) struct ulonglong1 ulonglong1;
typedef __attribute__((device_builtin)) struct longlong2 longlong2;
typedef __attribute__((device_builtin)) struct ulonglong2 ulonglong2;
typedef __attribute__((device_builtin)) struct longlong3 longlong3;
typedef __attribute__((device_builtin)) struct ulonglong3 ulonglong3;
typedef __attribute__((device_builtin)) struct longlong4 longlong4;
typedef __attribute__((device_builtin)) struct ulonglong4 ulonglong4;
typedef __attribute__((device_builtin)) struct double1 double1;
typedef __attribute__((device_builtin)) struct double2 double2;
typedef __attribute__((device_builtin)) struct double3 double3;
typedef __attribute__((device_builtin)) struct double4 double4;


struct __attribute__((device_builtin)) dim3
{
    unsigned int x, y, z;


    __attribute__((host)) __attribute__((device)) constexpr dim3(unsigned int vx = 1, unsigned int vy = 1, unsigned int vz = 1) : x(vx), y(vy), z(vz) {}
    __attribute__((host)) __attribute__((device)) constexpr dim3(uint3 v) : x(v.x), y(v.y), z(v.z) {}
    __attribute__((host)) __attribute__((device)) constexpr operator uint3(void) const { return uint3{x, y, z}; }


};

typedef __attribute__((device_builtin)) struct dim3 dim3;
# 62 "/usr/local/cuda-11.7/include/driver_types.h" 2 3
# 81 "/usr/local/cuda-11.7/include/driver_types.h" 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/limits.h" 1 3
# 21 "/usr/lib/llvm-14/lib/clang/14.0.0/include/limits.h" 3
# 1 "/usr/include/limits.h" 1 3 4
# 26 "/usr/include/limits.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h" 1 3 4
# 27 "/usr/include/limits.h" 2 3 4
# 195 "/usr/include/limits.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h" 1 3 4
# 27 "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/wordsize.h" 1 3 4
# 28 "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h" 2 3 4
# 161 "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/local_lim.h" 1 3 4
# 38 "/usr/include/x86_64-linux-gnu/bits/local_lim.h" 3 4
# 1 "/usr/include/linux/limits.h" 1 3 4
# 39 "/usr/include/x86_64-linux-gnu/bits/local_lim.h" 2 3 4
# 81 "/usr/include/x86_64-linux-gnu/bits/local_lim.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h" 1 3 4
# 23 "/usr/include/x86_64-linux-gnu/bits/pthread_stack_min-dynamic.h" 3 4
extern "C" {
extern long int __sysconf (int __name) noexcept (true);
}
# 82 "/usr/include/x86_64-linux-gnu/bits/local_lim.h" 2 3 4
# 162 "/usr/include/x86_64-linux-gnu/bits/posix1_lim.h" 2 3 4
# 196 "/usr/include/limits.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/posix2_lim.h" 1 3 4
# 200 "/usr/include/limits.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/xopen_lim.h" 1 3 4
# 64 "/usr/include/x86_64-linux-gnu/bits/xopen_lim.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/uio_lim.h" 1 3 4
# 65 "/usr/include/x86_64-linux-gnu/bits/xopen_lim.h" 2 3 4
# 204 "/usr/include/limits.h" 2 3 4
# 22 "/usr/lib/llvm-14/lib/clang/14.0.0/include/limits.h" 2 3
# 82 "/usr/local/cuda-11.7/include/driver_types.h" 2 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 1 3
# 35 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 3
typedef long int ptrdiff_t;
# 102 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__stddef_max_align_t.h" 1 3
# 19 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__stddef_max_align_t.h" 3
typedef struct {
  long long __clang_max_align_nonce1
      __attribute__((__aligned__(__alignof__(long long))));
  long double __clang_max_align_nonce2
      __attribute__((__aligned__(__alignof__(long double))));
} max_align_t;
# 103 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 2 3
# 83 "/usr/local/cuda-11.7/include/driver_types.h" 2 3
# 204 "/usr/local/cuda-11.7/include/driver_types.h" 3
enum __attribute__((device_builtin)) cudaError
{


    cudaSuccess = 0,


    cudaErrorInvalidValue = 1,


    cudaErrorMemoryAllocation = 2,


    cudaErrorInitializationError = 3,


    cudaErrorCudartUnloading = 4,


    cudaErrorProfilerDisabled = 5,


    cudaErrorProfilerNotInitialized = 6,


    cudaErrorProfilerAlreadyStarted = 7,


     cudaErrorProfilerAlreadyStopped = 8,
# 274 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorInvalidConfiguration = 9,


    cudaErrorInvalidPitchValue = 12,


    cudaErrorInvalidSymbol = 13,


    cudaErrorInvalidHostPointer = 16,


    cudaErrorInvalidDevicePointer = 17,


    cudaErrorInvalidTexture = 18,


    cudaErrorInvalidTextureBinding = 19,


    cudaErrorInvalidChannelDescriptor = 20,


    cudaErrorInvalidMemcpyDirection = 21,
# 337 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorAddressOfConstant = 22,
# 346 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorTextureFetchFailed = 23,
# 355 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorTextureNotBound = 24,
# 364 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorSynchronizationError = 25,


    cudaErrorInvalidFilterSetting = 26,


    cudaErrorInvalidNormSetting = 27,


    cudaErrorMixedDeviceExecution = 28,


    cudaErrorNotYetImplemented = 31,
# 401 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorMemoryValueTooLarge = 32,


    cudaErrorStubLibrary = 34,


    cudaErrorInsufficientDriver = 35,


    cudaErrorCallRequiresNewerDriver = 36,


    cudaErrorInvalidSurface = 37,


    cudaErrorDuplicateVariableName = 43,


    cudaErrorDuplicateTextureName = 44,


    cudaErrorDuplicateSurfaceName = 45,
# 456 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorDevicesUnavailable = 46,
# 469 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorIncompatibleDriverContext = 49,


    cudaErrorMissingConfiguration = 52,
# 484 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorPriorLaunchFailure = 53,


    cudaErrorLaunchMaxDepthExceeded = 65,


    cudaErrorLaunchFileScopedTex = 66,


    cudaErrorLaunchFileScopedSurf = 67,
# 522 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorSyncDepthExceeded = 68,
# 534 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorLaunchPendingCountExceeded = 69,


    cudaErrorInvalidDeviceFunction = 98,


    cudaErrorNoDevice = 100,


    cudaErrorInvalidDevice = 101,


    cudaErrorDeviceNotLicensed = 102,
# 567 "/usr/local/cuda-11.7/include/driver_types.h" 3
   cudaErrorSoftwareValidityNotEstablished = 103,


    cudaErrorStartupFailure = 127,


    cudaErrorInvalidKernelImage = 200,
# 587 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorDeviceUninitialized = 201,


    cudaErrorMapBufferObjectFailed = 205,


    cudaErrorUnmapBufferObjectFailed = 206,


    cudaErrorArrayIsMapped = 207,


    cudaErrorAlreadyMapped = 208,


    cudaErrorNoKernelImageForDevice = 209,


    cudaErrorAlreadyAcquired = 210,


    cudaErrorNotMapped = 211,


    cudaErrorNotMappedAsArray = 212,


    cudaErrorNotMappedAsPointer = 213,


    cudaErrorECCUncorrectable = 214,


    cudaErrorUnsupportedLimit = 215,


    cudaErrorDeviceAlreadyInUse = 216,


    cudaErrorPeerAccessUnsupported = 217,


    cudaErrorInvalidPtx = 218,


    cudaErrorInvalidGraphicsContext = 219,


    cudaErrorNvlinkUncorrectable = 220,


    cudaErrorJitCompilerNotFound = 221,


    cudaErrorUnsupportedPtxVersion = 222,


    cudaErrorJitCompilationDisabled = 223,


    cudaErrorUnsupportedExecAffinity = 224,


    cudaErrorInvalidSource = 300,


    cudaErrorFileNotFound = 301,


    cudaErrorSharedObjectSymbolNotFound = 302,


    cudaErrorSharedObjectInitFailed = 303,


    cudaErrorOperatingSystem = 304,


    cudaErrorInvalidResourceHandle = 400,


    cudaErrorIllegalState = 401,


    cudaErrorSymbolNotFound = 500,


    cudaErrorNotReady = 600,


    cudaErrorIllegalAddress = 700,
# 775 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorLaunchOutOfResources = 701,
# 786 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorLaunchTimeout = 702,


    cudaErrorLaunchIncompatibleTexturing = 703,


    cudaErrorPeerAccessAlreadyEnabled = 704,


    cudaErrorPeerAccessNotEnabled = 705,
# 819 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorSetOnActiveProcess = 708,


    cudaErrorContextIsDestroyed = 709,


    cudaErrorAssert = 710,


    cudaErrorTooManyPeers = 711,


    cudaErrorHostMemoryAlreadyRegistered = 712,


    cudaErrorHostMemoryNotRegistered = 713,
# 861 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorHardwareStackError = 714,


    cudaErrorIllegalInstruction = 715,
# 878 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorMisalignedAddress = 716,
# 889 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorInvalidAddressSpace = 717,


    cudaErrorInvalidPc = 718,
# 908 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorLaunchFailure = 719,
# 917 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorCooperativeLaunchTooLarge = 720,


    cudaErrorNotPermitted = 800,


    cudaErrorNotSupported = 801,
# 937 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorSystemNotReady = 802,


    cudaErrorSystemDriverMismatch = 803,
# 953 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorCompatNotSupportedOnDevice = 804,


    cudaErrorMpsConnectionFailed = 805,


    cudaErrorMpsRpcFailure = 806,


    cudaErrorMpsServerNotReady = 807,


    cudaErrorMpsMaxClientsReached = 808,


    cudaErrorMpsMaxConnectionsReached = 809,


    cudaErrorStreamCaptureUnsupported = 900,


    cudaErrorStreamCaptureInvalidated = 901,


    cudaErrorStreamCaptureMerge = 902,


    cudaErrorStreamCaptureUnmatched = 903,


    cudaErrorStreamCaptureUnjoined = 904,


    cudaErrorStreamCaptureIsolation = 905,


    cudaErrorStreamCaptureImplicit = 906,


    cudaErrorCapturedEvent = 907,


    cudaErrorStreamCaptureWrongThread = 908,


    cudaErrorTimeout = 909,


    cudaErrorGraphExecUpdateFailure = 910,
# 1054 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorExternalDevice = 911,
# 1067 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaErrorUnknown = 999,


    cudaErrorApiFailureBase = 10000
};


enum __attribute__((device_builtin)) cudaChannelFormatKind
{
    cudaChannelFormatKindSigned = 0,
    cudaChannelFormatKindUnsigned = 1,
    cudaChannelFormatKindFloat = 2,
    cudaChannelFormatKindNone = 3,
    cudaChannelFormatKindNV12 = 4,
    cudaChannelFormatKindUnsignedNormalized8X1 = 5,
    cudaChannelFormatKindUnsignedNormalized8X2 = 6,
    cudaChannelFormatKindUnsignedNormalized8X4 = 7,
    cudaChannelFormatKindUnsignedNormalized16X1 = 8,
    cudaChannelFormatKindUnsignedNormalized16X2 = 9,
    cudaChannelFormatKindUnsignedNormalized16X4 = 10,
    cudaChannelFormatKindSignedNormalized8X1 = 11,
    cudaChannelFormatKindSignedNormalized8X2 = 12,
    cudaChannelFormatKindSignedNormalized8X4 = 13,
    cudaChannelFormatKindSignedNormalized16X1 = 14,
    cudaChannelFormatKindSignedNormalized16X2 = 15,
    cudaChannelFormatKindSignedNormalized16X4 = 16,
    cudaChannelFormatKindUnsignedBlockCompressed1 = 17,
    cudaChannelFormatKindUnsignedBlockCompressed1SRGB = 18,
    cudaChannelFormatKindUnsignedBlockCompressed2 = 19,
    cudaChannelFormatKindUnsignedBlockCompressed2SRGB = 20,
    cudaChannelFormatKindUnsignedBlockCompressed3 = 21,
    cudaChannelFormatKindUnsignedBlockCompressed3SRGB = 22,
    cudaChannelFormatKindUnsignedBlockCompressed4 = 23,
    cudaChannelFormatKindSignedBlockCompressed4 = 24,
    cudaChannelFormatKindUnsignedBlockCompressed5 = 25,
    cudaChannelFormatKindSignedBlockCompressed5 = 26,
    cudaChannelFormatKindUnsignedBlockCompressed6H = 27,
    cudaChannelFormatKindSignedBlockCompressed6H = 28,
    cudaChannelFormatKindUnsignedBlockCompressed7 = 29,
    cudaChannelFormatKindUnsignedBlockCompressed7SRGB = 30
};


struct __attribute__((device_builtin)) cudaChannelFormatDesc
{
    int x;
    int y;
    int z;
    int w;
    enum cudaChannelFormatKind f;
};


typedef struct cudaArray *cudaArray_t;


typedef const struct cudaArray *cudaArray_const_t;

struct cudaArray;


typedef struct cudaMipmappedArray *cudaMipmappedArray_t;


typedef const struct cudaMipmappedArray *cudaMipmappedArray_const_t;

struct cudaMipmappedArray;
# 1160 "/usr/local/cuda-11.7/include/driver_types.h" 3
struct __attribute__((device_builtin)) cudaArraySparseProperties {
    struct {
        unsigned int width;
        unsigned int height;
        unsigned int depth;
    } tileExtent;
    unsigned int miptailFirstLevel;
    unsigned long long miptailSize;
    unsigned int flags;
    unsigned int reserved[4];
};


struct __attribute__((device_builtin)) cudaArrayMemoryRequirements {
    size_t size;
    size_t alignment;
    unsigned int reserved[4];
};


enum __attribute__((device_builtin)) cudaMemoryType
{
    cudaMemoryTypeUnregistered = 0,
    cudaMemoryTypeHost = 1,
    cudaMemoryTypeDevice = 2,
    cudaMemoryTypeManaged = 3
};


enum __attribute__((device_builtin)) cudaMemcpyKind
{
    cudaMemcpyHostToHost = 0,
    cudaMemcpyHostToDevice = 1,
    cudaMemcpyDeviceToHost = 2,
    cudaMemcpyDeviceToDevice = 3,
    cudaMemcpyDefault = 4
};


struct __attribute__((device_builtin)) cudaPitchedPtr
{
    void *ptr;
    size_t pitch;
    size_t xsize;
    size_t ysize;
};


struct __attribute__((device_builtin)) cudaExtent
{
    size_t width;
    size_t height;
    size_t depth;
};


struct __attribute__((device_builtin)) cudaPos
{
    size_t x;
    size_t y;
    size_t z;
};


struct __attribute__((device_builtin)) cudaMemcpy3DParms
{
    cudaArray_t srcArray;
    struct cudaPos srcPos;
    struct cudaPitchedPtr srcPtr;

    cudaArray_t dstArray;
    struct cudaPos dstPos;
    struct cudaPitchedPtr dstPtr;

    struct cudaExtent extent;
    enum cudaMemcpyKind kind;
};


struct __attribute__((device_builtin)) cudaMemcpy3DPeerParms
{
    cudaArray_t srcArray;
    struct cudaPos srcPos;
    struct cudaPitchedPtr srcPtr;
    int srcDevice;

    cudaArray_t dstArray;
    struct cudaPos dstPos;
    struct cudaPitchedPtr dstPtr;
    int dstDevice;

    struct cudaExtent extent;
};


struct __attribute__((device_builtin)) cudaMemsetParams {
    void *dst;
    size_t pitch;
    unsigned int value;
    unsigned int elementSize;
    size_t width;
    size_t height;
};


enum __attribute__((device_builtin)) cudaAccessProperty {
    cudaAccessPropertyNormal = 0,
    cudaAccessPropertyStreaming = 1,
    cudaAccessPropertyPersisting = 2
};
# 1310 "/usr/local/cuda-11.7/include/driver_types.h" 3
struct __attribute__((device_builtin)) cudaAccessPolicyWindow {
    void *base_ptr;
    size_t num_bytes;
    float hitRatio;
    enum cudaAccessProperty hitProp;
    enum cudaAccessProperty missProp;
};
# 1328 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef void ( *cudaHostFn_t)(void *userData);


struct __attribute__((device_builtin)) cudaHostNodeParams {
    cudaHostFn_t fn;
    void* userData;
};


enum __attribute__((device_builtin)) cudaStreamCaptureStatus {
    cudaStreamCaptureStatusNone = 0,
    cudaStreamCaptureStatusActive = 1,
    cudaStreamCaptureStatusInvalidated = 2

};


enum __attribute__((device_builtin)) cudaStreamCaptureMode {
    cudaStreamCaptureModeGlobal = 0,
    cudaStreamCaptureModeThreadLocal = 1,
    cudaStreamCaptureModeRelaxed = 2
};

enum __attribute__((device_builtin)) cudaSynchronizationPolicy {
    cudaSyncPolicyAuto = 1,
    cudaSyncPolicySpin = 2,
    cudaSyncPolicyYield = 3,
    cudaSyncPolicyBlockingSync = 4
};
# 1379 "/usr/local/cuda-11.7/include/driver_types.h" 3
enum __attribute__((device_builtin)) cudaStreamUpdateCaptureDependenciesFlags {
    cudaStreamAddCaptureDependencies = 0x0,
    cudaStreamSetCaptureDependencies = 0x1
};


enum __attribute__((device_builtin)) cudaUserObjectFlags {
    cudaUserObjectNoDestructorSync = 0x1
};


enum __attribute__((device_builtin)) cudaUserObjectRetainFlags {
    cudaGraphUserObjectMove = 0x1
};


struct cudaGraphicsResource;


enum __attribute__((device_builtin)) cudaGraphicsRegisterFlags
{
    cudaGraphicsRegisterFlagsNone = 0,
    cudaGraphicsRegisterFlagsReadOnly = 1,
    cudaGraphicsRegisterFlagsWriteDiscard = 2,
    cudaGraphicsRegisterFlagsSurfaceLoadStore = 4,
    cudaGraphicsRegisterFlagsTextureGather = 8
};


enum __attribute__((device_builtin)) cudaGraphicsMapFlags
{
    cudaGraphicsMapFlagsNone = 0,
    cudaGraphicsMapFlagsReadOnly = 1,
    cudaGraphicsMapFlagsWriteDiscard = 2
};


enum __attribute__((device_builtin)) cudaGraphicsCubeFace
{
    cudaGraphicsCubeFacePositiveX = 0x00,
    cudaGraphicsCubeFaceNegativeX = 0x01,
    cudaGraphicsCubeFacePositiveY = 0x02,
    cudaGraphicsCubeFaceNegativeY = 0x03,
    cudaGraphicsCubeFacePositiveZ = 0x04,
    cudaGraphicsCubeFaceNegativeZ = 0x05
};


enum __attribute__((device_builtin)) cudaResourceType
{
    cudaResourceTypeArray = 0x00,
    cudaResourceTypeMipmappedArray = 0x01,
    cudaResourceTypeLinear = 0x02,
    cudaResourceTypePitch2D = 0x03
};


enum __attribute__((device_builtin)) cudaResourceViewFormat
{
    cudaResViewFormatNone = 0x00,
    cudaResViewFormatUnsignedChar1 = 0x01,
    cudaResViewFormatUnsignedChar2 = 0x02,
    cudaResViewFormatUnsignedChar4 = 0x03,
    cudaResViewFormatSignedChar1 = 0x04,
    cudaResViewFormatSignedChar2 = 0x05,
    cudaResViewFormatSignedChar4 = 0x06,
    cudaResViewFormatUnsignedShort1 = 0x07,
    cudaResViewFormatUnsignedShort2 = 0x08,
    cudaResViewFormatUnsignedShort4 = 0x09,
    cudaResViewFormatSignedShort1 = 0x0a,
    cudaResViewFormatSignedShort2 = 0x0b,
    cudaResViewFormatSignedShort4 = 0x0c,
    cudaResViewFormatUnsignedInt1 = 0x0d,
    cudaResViewFormatUnsignedInt2 = 0x0e,
    cudaResViewFormatUnsignedInt4 = 0x0f,
    cudaResViewFormatSignedInt1 = 0x10,
    cudaResViewFormatSignedInt2 = 0x11,
    cudaResViewFormatSignedInt4 = 0x12,
    cudaResViewFormatHalf1 = 0x13,
    cudaResViewFormatHalf2 = 0x14,
    cudaResViewFormatHalf4 = 0x15,
    cudaResViewFormatFloat1 = 0x16,
    cudaResViewFormatFloat2 = 0x17,
    cudaResViewFormatFloat4 = 0x18,
    cudaResViewFormatUnsignedBlockCompressed1 = 0x19,
    cudaResViewFormatUnsignedBlockCompressed2 = 0x1a,
    cudaResViewFormatUnsignedBlockCompressed3 = 0x1b,
    cudaResViewFormatUnsignedBlockCompressed4 = 0x1c,
    cudaResViewFormatSignedBlockCompressed4 = 0x1d,
    cudaResViewFormatUnsignedBlockCompressed5 = 0x1e,
    cudaResViewFormatSignedBlockCompressed5 = 0x1f,
    cudaResViewFormatUnsignedBlockCompressed6H = 0x20,
    cudaResViewFormatSignedBlockCompressed6H = 0x21,
    cudaResViewFormatUnsignedBlockCompressed7 = 0x22
};


struct __attribute__((device_builtin)) cudaResourceDesc {
    enum cudaResourceType resType;

    union {
        struct {
            cudaArray_t array;
        } array;
        struct {
            cudaMipmappedArray_t mipmap;
        } mipmap;
        struct {
            void *devPtr;
            struct cudaChannelFormatDesc desc;
            size_t sizeInBytes;
        } linear;
        struct {
            void *devPtr;
            struct cudaChannelFormatDesc desc;
            size_t width;
            size_t height;
            size_t pitchInBytes;
        } pitch2D;
    } res;
};


struct __attribute__((device_builtin)) cudaResourceViewDesc
{
    enum cudaResourceViewFormat format;
    size_t width;
    size_t height;
    size_t depth;
    unsigned int firstMipmapLevel;
    unsigned int lastMipmapLevel;
    unsigned int firstLayer;
    unsigned int lastLayer;
};


struct __attribute__((device_builtin)) cudaPointerAttributes
{


    enum cudaMemoryType type;
# 1554 "/usr/local/cuda-11.7/include/driver_types.h" 3
    int device;


    void *devicePointer;
# 1569 "/usr/local/cuda-11.7/include/driver_types.h" 3
    void *hostPointer;
};


struct __attribute__((device_builtin)) cudaFuncAttributes
{


   size_t sharedSizeBytes;


   size_t constSizeBytes;


   size_t localSizeBytes;


   int maxThreadsPerBlock;


   int numRegs;


   int ptxVersion;


   int binaryVersion;


   int cacheModeCA;


   int maxDynamicSharedSizeBytes;
# 1641 "/usr/local/cuda-11.7/include/driver_types.h" 3
   int preferredShmemCarveout;
# 1691 "/usr/local/cuda-11.7/include/driver_types.h" 3
};


enum __attribute__((device_builtin)) cudaFuncAttribute
{
    cudaFuncAttributeMaxDynamicSharedMemorySize = 8,
    cudaFuncAttributePreferredSharedMemoryCarveout = 9,
# 1708 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaFuncAttributeMax
};


enum __attribute__((device_builtin)) cudaFuncCache
{
    cudaFuncCachePreferNone = 0,
    cudaFuncCachePreferShared = 1,
    cudaFuncCachePreferL1 = 2,
    cudaFuncCachePreferEqual = 3
};


enum __attribute__((device_builtin)) cudaSharedMemConfig
{
    cudaSharedMemBankSizeDefault = 0,
    cudaSharedMemBankSizeFourByte = 1,
    cudaSharedMemBankSizeEightByte = 2
};


enum __attribute__((device_builtin)) cudaSharedCarveout {
    cudaSharedmemCarveoutDefault = -1,
    cudaSharedmemCarveoutMaxShared = 100,
    cudaSharedmemCarveoutMaxL1 = 0
};


enum __attribute__((device_builtin)) cudaComputeMode
{
    cudaComputeModeDefault = 0,
    cudaComputeModeExclusive = 1,
    cudaComputeModeProhibited = 2,
    cudaComputeModeExclusiveProcess = 3
};


enum __attribute__((device_builtin)) cudaLimit
{
    cudaLimitStackSize = 0x00,
    cudaLimitPrintfFifoSize = 0x01,
    cudaLimitMallocHeapSize = 0x02,
    cudaLimitDevRuntimeSyncDepth = 0x03,
    cudaLimitDevRuntimePendingLaunchCount = 0x04,
    cudaLimitMaxL2FetchGranularity = 0x05,
    cudaLimitPersistingL2CacheSize = 0x06
};


enum __attribute__((device_builtin)) cudaMemoryAdvise
{
    cudaMemAdviseSetReadMostly = 1,
    cudaMemAdviseUnsetReadMostly = 2,
    cudaMemAdviseSetPreferredLocation = 3,
    cudaMemAdviseUnsetPreferredLocation = 4,
    cudaMemAdviseSetAccessedBy = 5,
    cudaMemAdviseUnsetAccessedBy = 6
};


enum __attribute__((device_builtin)) cudaMemRangeAttribute
{
    cudaMemRangeAttributeReadMostly = 1,
    cudaMemRangeAttributePreferredLocation = 2,
    cudaMemRangeAttributeAccessedBy = 3,
    cudaMemRangeAttributeLastPrefetchLocation = 4
};


enum __attribute__((device_builtin)) cudaOutputMode
{
    cudaKeyValuePair = 0x00,
    cudaCSV = 0x01
};


enum __attribute__((device_builtin)) cudaFlushGPUDirectRDMAWritesOptions {
    cudaFlushGPUDirectRDMAWritesOptionHost = 1<<0,
    cudaFlushGPUDirectRDMAWritesOptionMemOps = 1<<1
};


enum __attribute__((device_builtin)) cudaGPUDirectRDMAWritesOrdering {
    cudaGPUDirectRDMAWritesOrderingNone = 0,
    cudaGPUDirectRDMAWritesOrderingOwner = 100,
    cudaGPUDirectRDMAWritesOrderingAllDevices = 200
};


enum __attribute__((device_builtin)) cudaFlushGPUDirectRDMAWritesScope {
    cudaFlushGPUDirectRDMAWritesToOwner = 100,
    cudaFlushGPUDirectRDMAWritesToAllDevices = 200
};


enum __attribute__((device_builtin)) cudaFlushGPUDirectRDMAWritesTarget {
    cudaFlushGPUDirectRDMAWritesTargetCurrentDevice
};


enum __attribute__((device_builtin)) cudaDeviceAttr
{
    cudaDevAttrMaxThreadsPerBlock = 1,
    cudaDevAttrMaxBlockDimX = 2,
    cudaDevAttrMaxBlockDimY = 3,
    cudaDevAttrMaxBlockDimZ = 4,
    cudaDevAttrMaxGridDimX = 5,
    cudaDevAttrMaxGridDimY = 6,
    cudaDevAttrMaxGridDimZ = 7,
    cudaDevAttrMaxSharedMemoryPerBlock = 8,
    cudaDevAttrTotalConstantMemory = 9,
    cudaDevAttrWarpSize = 10,
    cudaDevAttrMaxPitch = 11,
    cudaDevAttrMaxRegistersPerBlock = 12,
    cudaDevAttrClockRate = 13,
    cudaDevAttrTextureAlignment = 14,
    cudaDevAttrGpuOverlap = 15,
    cudaDevAttrMultiProcessorCount = 16,
    cudaDevAttrKernelExecTimeout = 17,
    cudaDevAttrIntegrated = 18,
    cudaDevAttrCanMapHostMemory = 19,
    cudaDevAttrComputeMode = 20,
    cudaDevAttrMaxTexture1DWidth = 21,
    cudaDevAttrMaxTexture2DWidth = 22,
    cudaDevAttrMaxTexture2DHeight = 23,
    cudaDevAttrMaxTexture3DWidth = 24,
    cudaDevAttrMaxTexture3DHeight = 25,
    cudaDevAttrMaxTexture3DDepth = 26,
    cudaDevAttrMaxTexture2DLayeredWidth = 27,
    cudaDevAttrMaxTexture2DLayeredHeight = 28,
    cudaDevAttrMaxTexture2DLayeredLayers = 29,
    cudaDevAttrSurfaceAlignment = 30,
    cudaDevAttrConcurrentKernels = 31,
    cudaDevAttrEccEnabled = 32,
    cudaDevAttrPciBusId = 33,
    cudaDevAttrPciDeviceId = 34,
    cudaDevAttrTccDriver = 35,
    cudaDevAttrMemoryClockRate = 36,
    cudaDevAttrGlobalMemoryBusWidth = 37,
    cudaDevAttrL2CacheSize = 38,
    cudaDevAttrMaxThreadsPerMultiProcessor = 39,
    cudaDevAttrAsyncEngineCount = 40,
    cudaDevAttrUnifiedAddressing = 41,
    cudaDevAttrMaxTexture1DLayeredWidth = 42,
    cudaDevAttrMaxTexture1DLayeredLayers = 43,
    cudaDevAttrMaxTexture2DGatherWidth = 45,
    cudaDevAttrMaxTexture2DGatherHeight = 46,
    cudaDevAttrMaxTexture3DWidthAlt = 47,
    cudaDevAttrMaxTexture3DHeightAlt = 48,
    cudaDevAttrMaxTexture3DDepthAlt = 49,
    cudaDevAttrPciDomainId = 50,
    cudaDevAttrTexturePitchAlignment = 51,
    cudaDevAttrMaxTextureCubemapWidth = 52,
    cudaDevAttrMaxTextureCubemapLayeredWidth = 53,
    cudaDevAttrMaxTextureCubemapLayeredLayers = 54,
    cudaDevAttrMaxSurface1DWidth = 55,
    cudaDevAttrMaxSurface2DWidth = 56,
    cudaDevAttrMaxSurface2DHeight = 57,
    cudaDevAttrMaxSurface3DWidth = 58,
    cudaDevAttrMaxSurface3DHeight = 59,
    cudaDevAttrMaxSurface3DDepth = 60,
    cudaDevAttrMaxSurface1DLayeredWidth = 61,
    cudaDevAttrMaxSurface1DLayeredLayers = 62,
    cudaDevAttrMaxSurface2DLayeredWidth = 63,
    cudaDevAttrMaxSurface2DLayeredHeight = 64,
    cudaDevAttrMaxSurface2DLayeredLayers = 65,
    cudaDevAttrMaxSurfaceCubemapWidth = 66,
    cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67,
    cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68,
    cudaDevAttrMaxTexture1DLinearWidth = 69,
    cudaDevAttrMaxTexture2DLinearWidth = 70,
    cudaDevAttrMaxTexture2DLinearHeight = 71,
    cudaDevAttrMaxTexture2DLinearPitch = 72,
    cudaDevAttrMaxTexture2DMipmappedWidth = 73,
    cudaDevAttrMaxTexture2DMipmappedHeight = 74,
    cudaDevAttrComputeCapabilityMajor = 75,
    cudaDevAttrComputeCapabilityMinor = 76,
    cudaDevAttrMaxTexture1DMipmappedWidth = 77,
    cudaDevAttrStreamPrioritiesSupported = 78,
    cudaDevAttrGlobalL1CacheSupported = 79,
    cudaDevAttrLocalL1CacheSupported = 80,
    cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81,
    cudaDevAttrMaxRegistersPerMultiprocessor = 82,
    cudaDevAttrManagedMemory = 83,
    cudaDevAttrIsMultiGpuBoard = 84,
    cudaDevAttrMultiGpuBoardGroupID = 85,
    cudaDevAttrHostNativeAtomicSupported = 86,
    cudaDevAttrSingleToDoublePrecisionPerfRatio = 87,
    cudaDevAttrPageableMemoryAccess = 88,
    cudaDevAttrConcurrentManagedAccess = 89,
    cudaDevAttrComputePreemptionSupported = 90,
    cudaDevAttrCanUseHostPointerForRegisteredMem = 91,
    cudaDevAttrReserved92 = 92,
    cudaDevAttrReserved93 = 93,
    cudaDevAttrReserved94 = 94,
    cudaDevAttrCooperativeLaunch = 95,
    cudaDevAttrCooperativeMultiDeviceLaunch = 96,
    cudaDevAttrMaxSharedMemoryPerBlockOptin = 97,
    cudaDevAttrCanFlushRemoteWrites = 98,
    cudaDevAttrHostRegisterSupported = 99,
    cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100,
    cudaDevAttrDirectManagedMemAccessFromHost = 101,
    cudaDevAttrMaxBlocksPerMultiprocessor = 106,
    cudaDevAttrMaxPersistingL2CacheSize = 108,
    cudaDevAttrMaxAccessPolicyWindowSize = 109,
    cudaDevAttrReservedSharedMemoryPerBlock = 111,
    cudaDevAttrSparseCudaArraySupported = 112,
    cudaDevAttrHostRegisterReadOnlySupported = 113,
    cudaDevAttrTimelineSemaphoreInteropSupported = 114,
    cudaDevAttrMaxTimelineSemaphoreInteropSupported = 114,
    cudaDevAttrMemoryPoolsSupported = 115,
    cudaDevAttrGPUDirectRDMASupported = 116,
    cudaDevAttrGPUDirectRDMAFlushWritesOptions = 117,
    cudaDevAttrGPUDirectRDMAWritesOrdering = 118,
    cudaDevAttrMemoryPoolSupportedHandleTypes = 119,


    cudaDevAttrDeferredMappingCudaArraySupported = 121,

    cudaDevAttrMax
};


enum __attribute__((device_builtin)) cudaMemPoolAttr
{
# 1973 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaMemPoolReuseFollowEventDependencies = 0x1,


    cudaMemPoolReuseAllowOpportunistic = 0x2,


    cudaMemPoolReuseAllowInternalDependencies = 0x3,
# 1999 "/usr/local/cuda-11.7/include/driver_types.h" 3
    cudaMemPoolAttrReleaseThreshold = 0x4,


    cudaMemPoolAttrReservedMemCurrent = 0x5,


    cudaMemPoolAttrReservedMemHigh = 0x6,


    cudaMemPoolAttrUsedMemCurrent = 0x7,


    cudaMemPoolAttrUsedMemHigh = 0x8
};


enum __attribute__((device_builtin)) cudaMemLocationType {
    cudaMemLocationTypeInvalid = 0,
    cudaMemLocationTypeDevice = 1
};


struct __attribute__((device_builtin)) cudaMemLocation {
    enum cudaMemLocationType type;
    int id;
};


enum __attribute__((device_builtin)) cudaMemAccessFlags {
    cudaMemAccessFlagsProtNone = 0,
    cudaMemAccessFlagsProtRead = 1,
    cudaMemAccessFlagsProtReadWrite = 3
};


struct __attribute__((device_builtin)) cudaMemAccessDesc {
    struct cudaMemLocation location;
    enum cudaMemAccessFlags flags;
};


enum __attribute__((device_builtin)) cudaMemAllocationType {
    cudaMemAllocationTypeInvalid = 0x0,


    cudaMemAllocationTypePinned = 0x1,
    cudaMemAllocationTypeMax = 0x7FFFFFFF
};


enum __attribute__((device_builtin)) cudaMemAllocationHandleType {
    cudaMemHandleTypeNone = 0x0,
    cudaMemHandleTypePosixFileDescriptor = 0x1,
    cudaMemHandleTypeWin32 = 0x2,
    cudaMemHandleTypeWin32Kmt = 0x4
};


struct __attribute__((device_builtin)) cudaMemPoolProps {
    enum cudaMemAllocationType allocType;
    enum cudaMemAllocationHandleType handleTypes;
    struct cudaMemLocation location;


    void *win32SecurityAttributes;
    unsigned char reserved[64];
};


struct __attribute__((device_builtin)) cudaMemPoolPtrExportData {
    unsigned char reserved[64];
};


struct __attribute__((device_builtin)) cudaMemAllocNodeParams {


    struct cudaMemPoolProps poolProps;
    const struct cudaMemAccessDesc *accessDescs;
    size_t accessDescCount;
    size_t bytesize;
    void *dptr;
};


enum __attribute__((device_builtin)) cudaGraphMemAttributeType {


    cudaGraphMemAttrUsedMemCurrent = 0x0,


    cudaGraphMemAttrUsedMemHigh = 0x1,


    cudaGraphMemAttrReservedMemCurrent = 0x2,


    cudaGraphMemAttrReservedMemHigh = 0x3
};


enum __attribute__((device_builtin)) cudaDeviceP2PAttr {
    cudaDevP2PAttrPerformanceRank = 1,
    cudaDevP2PAttrAccessSupported = 2,
    cudaDevP2PAttrNativeAtomicSupported = 3,
    cudaDevP2PAttrCudaArrayAccessSupported = 4
};
# 2177 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef __attribute__((device_builtin)) struct CUuuid_st cudaUUID_t;


struct __attribute__((device_builtin)) cudaDeviceProp
{
    char name[256];
    cudaUUID_t uuid;
    char luid[8];
    unsigned int luidDeviceNodeMask;
    size_t totalGlobalMem;
    size_t sharedMemPerBlock;
    int regsPerBlock;
    int warpSize;
    size_t memPitch;
    int maxThreadsPerBlock;
    int maxThreadsDim[3];
    int maxGridSize[3];
    int clockRate;
    size_t totalConstMem;
    int major;
    int minor;
    size_t textureAlignment;
    size_t texturePitchAlignment;
    int deviceOverlap;
    int multiProcessorCount;
    int kernelExecTimeoutEnabled;
    int integrated;
    int canMapHostMemory;
    int computeMode;
    int maxTexture1D;
    int maxTexture1DMipmap;
    int maxTexture1DLinear;
    int maxTexture2D[2];
    int maxTexture2DMipmap[2];
    int maxTexture2DLinear[3];
    int maxTexture2DGather[2];
    int maxTexture3D[3];
    int maxTexture3DAlt[3];
    int maxTextureCubemap;
    int maxTexture1DLayered[2];
    int maxTexture2DLayered[3];
    int maxTextureCubemapLayered[2];
    int maxSurface1D;
    int maxSurface2D[2];
    int maxSurface3D[3];
    int maxSurface1DLayered[2];
    int maxSurface2DLayered[3];
    int maxSurfaceCubemap;
    int maxSurfaceCubemapLayered[2];
    size_t surfaceAlignment;
    int concurrentKernels;
    int ECCEnabled;
    int pciBusID;
    int pciDeviceID;
    int pciDomainID;
    int tccDriver;
    int asyncEngineCount;
    int unifiedAddressing;
    int memoryClockRate;
    int memoryBusWidth;
    int l2CacheSize;
    int persistingL2CacheMaxSize;
    int maxThreadsPerMultiProcessor;
    int streamPrioritiesSupported;
    int globalL1CacheSupported;
    int localL1CacheSupported;
    size_t sharedMemPerMultiprocessor;
    int regsPerMultiprocessor;
    int managedMemory;
    int isMultiGpuBoard;
    int multiGpuBoardGroupID;
    int hostNativeAtomicSupported;
    int singleToDoublePrecisionPerfRatio;
    int pageableMemoryAccess;
    int concurrentManagedAccess;
    int computePreemptionSupported;
    int canUseHostPointerForRegisteredMem;
    int cooperativeLaunch;
    int cooperativeMultiDeviceLaunch;
    size_t sharedMemPerBlockOptin;
    int pageableMemoryAccessUsesHostPageTables;
    int directManagedMemAccessFromHost;
    int maxBlocksPerMultiProcessor;
    int accessPolicyMaxWindowSize;
    size_t reservedSharedMemPerBlock;
};
# 2362 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef __attribute__((device_builtin)) struct __attribute__((device_builtin)) cudaIpcEventHandle_st
{
    char reserved[64];
}cudaIpcEventHandle_t;


typedef __attribute__((device_builtin)) struct __attribute__((device_builtin)) cudaIpcMemHandle_st
{
    char reserved[64];
}cudaIpcMemHandle_t;


enum __attribute__((device_builtin)) cudaExternalMemoryHandleType {


    cudaExternalMemoryHandleTypeOpaqueFd = 1,


    cudaExternalMemoryHandleTypeOpaqueWin32 = 2,


    cudaExternalMemoryHandleTypeOpaqueWin32Kmt = 3,


    cudaExternalMemoryHandleTypeD3D12Heap = 4,


    cudaExternalMemoryHandleTypeD3D12Resource = 5,


    cudaExternalMemoryHandleTypeD3D11Resource = 6,


    cudaExternalMemoryHandleTypeD3D11ResourceKmt = 7,


    cudaExternalMemoryHandleTypeNvSciBuf = 8
};
# 2453 "/usr/local/cuda-11.7/include/driver_types.h" 3
struct __attribute__((device_builtin)) cudaExternalMemoryHandleDesc {


    enum cudaExternalMemoryHandleType type;
    union {


        int fd;
# 2480 "/usr/local/cuda-11.7/include/driver_types.h" 3
        struct {


            void *handle;


            const void *name;
        } win32;


        const void *nvSciBufObject;
    } handle;


    unsigned long long size;


    unsigned int flags;
};


struct __attribute__((device_builtin)) cudaExternalMemoryBufferDesc {


    unsigned long long offset;


    unsigned long long size;


    unsigned int flags;
};


struct __attribute__((device_builtin)) cudaExternalMemoryMipmappedArrayDesc {


    unsigned long long offset;


    struct cudaChannelFormatDesc formatDesc;


    struct cudaExtent extent;


    unsigned int flags;


    unsigned int numLevels;
};


enum __attribute__((device_builtin)) cudaExternalSemaphoreHandleType {


    cudaExternalSemaphoreHandleTypeOpaqueFd = 1,


    cudaExternalSemaphoreHandleTypeOpaqueWin32 = 2,


    cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3,


    cudaExternalSemaphoreHandleTypeD3D12Fence = 4,


    cudaExternalSemaphoreHandleTypeD3D11Fence = 5,


     cudaExternalSemaphoreHandleTypeNvSciSync = 6,


    cudaExternalSemaphoreHandleTypeKeyedMutex = 7,


    cudaExternalSemaphoreHandleTypeKeyedMutexKmt = 8,


    cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd = 9,


    cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = 10
};


struct __attribute__((device_builtin)) cudaExternalSemaphoreHandleDesc {


    enum cudaExternalSemaphoreHandleType type;
    union {


        int fd;
# 2630 "/usr/local/cuda-11.7/include/driver_types.h" 3
        struct {


            void *handle;


            const void *name;
        } win32;


        const void* nvSciSyncObj;
    } handle;


    unsigned int flags;
};


struct __attribute__((device_builtin)) cudaExternalSemaphoreSignalParams_v1 {
    struct {


        struct {


            unsigned long long value;
        } fence;
        union {


            void *fence;
            unsigned long long reserved;
        } nvSciSync;


        struct {


            unsigned long long key;
        } keyedMutex;
    } params;
# 2694 "/usr/local/cuda-11.7/include/driver_types.h" 3
    unsigned int flags;
};


struct __attribute__((device_builtin)) cudaExternalSemaphoreWaitParams_v1 {
    struct {


        struct {


            unsigned long long value;
        } fence;
        union {


            void *fence;
            unsigned long long reserved;
        } nvSciSync;


        struct {


            unsigned long long key;


            unsigned int timeoutMs;
        } keyedMutex;
    } params;
# 2743 "/usr/local/cuda-11.7/include/driver_types.h" 3
    unsigned int flags;
};


struct __attribute__((device_builtin)) cudaExternalSemaphoreSignalParams{
    struct {


        struct {


            unsigned long long value;
        } fence;
        union {


            void *fence;
            unsigned long long reserved;
        } nvSciSync;


        struct {


            unsigned long long key;
        } keyedMutex;
        unsigned int reserved[12];
    } params;
# 2789 "/usr/local/cuda-11.7/include/driver_types.h" 3
    unsigned int flags;
    unsigned int reserved[16];
};


struct __attribute__((device_builtin)) cudaExternalSemaphoreWaitParams {
    struct {


        struct {


            unsigned long long value;
        } fence;
        union {


            void *fence;
            unsigned long long reserved;
        } nvSciSync;


        struct {


            unsigned long long key;


            unsigned int timeoutMs;
        } keyedMutex;
        unsigned int reserved[10];
    } params;
# 2840 "/usr/local/cuda-11.7/include/driver_types.h" 3
    unsigned int flags;
    unsigned int reserved[16];
};
# 2853 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef __attribute__((device_builtin)) enum cudaError cudaError_t;


typedef __attribute__((device_builtin)) struct CUstream_st *cudaStream_t;


typedef __attribute__((device_builtin)) struct CUevent_st *cudaEvent_t;


typedef __attribute__((device_builtin)) struct cudaGraphicsResource *cudaGraphicsResource_t;


typedef __attribute__((device_builtin)) enum cudaOutputMode cudaOutputMode_t;


typedef __attribute__((device_builtin)) struct CUexternalMemory_st *cudaExternalMemory_t;


typedef __attribute__((device_builtin)) struct CUexternalSemaphore_st *cudaExternalSemaphore_t;


typedef __attribute__((device_builtin)) struct CUgraph_st *cudaGraph_t;


typedef __attribute__((device_builtin)) struct CUgraphNode_st *cudaGraphNode_t;


typedef __attribute__((device_builtin)) struct CUuserObject_st *cudaUserObject_t;


typedef __attribute__((device_builtin)) struct CUfunc_st *cudaFunction_t;


typedef __attribute__((device_builtin)) struct CUmemPoolHandle_st *cudaMemPool_t;


enum __attribute__((device_builtin)) cudaCGScope {
    cudaCGScopeInvalid = 0,
    cudaCGScopeGrid = 1,
    cudaCGScopeMultiGrid = 2
};


struct __attribute__((device_builtin)) cudaLaunchParams
{
    void *func;
    dim3 gridDim;
    dim3 blockDim;
    void **args;
    size_t sharedMem;
    cudaStream_t stream;
};


struct __attribute__((device_builtin)) cudaKernelNodeParams {
    void* func;
    dim3 gridDim;
    dim3 blockDim;
    unsigned int sharedMemBytes;
    void **kernelParams;
    void **extra;
};


struct __attribute__((device_builtin)) cudaExternalSemaphoreSignalNodeParams {
    cudaExternalSemaphore_t* extSemArray;
    const struct cudaExternalSemaphoreSignalParams* paramsArray;
    unsigned int numExtSems;
};


struct __attribute__((device_builtin)) cudaExternalSemaphoreWaitNodeParams {
    cudaExternalSemaphore_t* extSemArray;
    const struct cudaExternalSemaphoreWaitParams* paramsArray;
    unsigned int numExtSems;
};


enum __attribute__((device_builtin)) cudaGraphNodeType {
    cudaGraphNodeTypeKernel = 0x00,
    cudaGraphNodeTypeMemcpy = 0x01,
    cudaGraphNodeTypeMemset = 0x02,
    cudaGraphNodeTypeHost = 0x03,
    cudaGraphNodeTypeGraph = 0x04,
    cudaGraphNodeTypeEmpty = 0x05,
    cudaGraphNodeTypeWaitEvent = 0x06,
    cudaGraphNodeTypeEventRecord = 0x07,
    cudaGraphNodeTypeExtSemaphoreSignal = 0x08,
    cudaGraphNodeTypeExtSemaphoreWait = 0x09,
    cudaGraphNodeTypeMemAlloc = 0x0a,
    cudaGraphNodeTypeMemFree = 0x0b,
    cudaGraphNodeTypeCount
};


typedef struct CUgraphExec_st* cudaGraphExec_t;


enum __attribute__((device_builtin)) cudaGraphExecUpdateResult {
    cudaGraphExecUpdateSuccess = 0x0,
    cudaGraphExecUpdateError = 0x1,
    cudaGraphExecUpdateErrorTopologyChanged = 0x2,
    cudaGraphExecUpdateErrorNodeTypeChanged = 0x3,
    cudaGraphExecUpdateErrorFunctionChanged = 0x4,
    cudaGraphExecUpdateErrorParametersChanged = 0x5,
    cudaGraphExecUpdateErrorNotSupported = 0x6,
    cudaGraphExecUpdateErrorUnsupportedFunctionChange = 0x7,
    cudaGraphExecUpdateErrorAttributesChanged = 0x8
};


enum __attribute__((device_builtin)) cudaGetDriverEntryPointFlags {
    cudaEnableDefault = 0x0,
    cudaEnableLegacyStream = 0x1,
    cudaEnablePerThreadDefaultStream = 0x2
};


enum __attribute__((device_builtin)) cudaGraphDebugDotFlags {
    cudaGraphDebugDotFlagsVerbose = 1<<0,
    cudaGraphDebugDotFlagsKernelNodeParams = 1<<2,
    cudaGraphDebugDotFlagsMemcpyNodeParams = 1<<3,
    cudaGraphDebugDotFlagsMemsetNodeParams = 1<<4,
    cudaGraphDebugDotFlagsHostNodeParams = 1<<5,
    cudaGraphDebugDotFlagsEventNodeParams = 1<<6,
    cudaGraphDebugDotFlagsExtSemasSignalNodeParams = 1<<7,
    cudaGraphDebugDotFlagsExtSemasWaitNodeParams = 1<<8,
    cudaGraphDebugDotFlagsKernelNodeAttributes = 1<<9,
    cudaGraphDebugDotFlagsHandles = 1<<10
};


enum __attribute__((device_builtin)) cudaGraphInstantiateFlags {
    cudaGraphInstantiateFlagAutoFreeOnLaunch = 1

  , cudaGraphInstantiateFlagUseNodePriority = 8


};
# 3126 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef __attribute__((device_builtin)) enum cudaStreamAttrID {
    cudaStreamAttributeAccessPolicyWindow = 1,
    cudaStreamAttributeSynchronizationPolicy = 3
} cudaStreamAttrID;
# 3140 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef __attribute__((device_builtin)) union cudaStreamAttrValue {
    struct cudaAccessPolicyWindow accessPolicyWindow;
    enum cudaSynchronizationPolicy syncPolicy;
} cudaStreamAttrValue;
# 3152 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef __attribute__((device_builtin)) enum cudaKernelNodeAttrID {
    cudaKernelNodeAttributeAccessPolicyWindow = 1
  , cudaKernelNodeAttributeCooperative = 2

  , cudaKernelNodeAttributePriority = 8

} cudaKernelNodeAttrID;
# 3170 "/usr/local/cuda-11.7/include/driver_types.h" 3
typedef __attribute__((device_builtin)) union cudaKernelNodeAttrValue {
    struct cudaAccessPolicyWindow accessPolicyWindow;
    int cooperative;

    int priority;

} cudaKernelNodeAttrValue;
# 101 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/local/cuda-11.7/include/host_config.h" 1 3
# 60 "/usr/local/cuda-11.7/include/host_config.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_config.h" 1 3
# 61 "/usr/local/cuda-11.7/include/host_config.h" 2 3
# 102 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 111 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/cuda_runtime.h" 1 3
# 61 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
#pragma GCC diagnostic push


#pragma GCC diagnostic ignored "-Wunused-function"
# 83 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_config.h" 1 3
# 84 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3


# 1 "/usr/local/cuda-11.7/include/builtin_types.h" 1 3
# 56 "/usr/local/cuda-11.7/include/builtin_types.h" 3
# 1 "/usr/local/cuda-11.7/include/device_types.h" 1 3
# 59 "/usr/local/cuda-11.7/include/device_types.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 60 "/usr/local/cuda-11.7/include/device_types.h" 2 3


enum __attribute__((device_builtin)) cudaRoundMode
{
    cudaRoundNearest,
    cudaRoundZero,
    cudaRoundPosInf,
    cudaRoundMinInf
};
# 57 "/usr/local/cuda-11.7/include/builtin_types.h" 2 3


# 1 "/usr/local/cuda-11.7/include/surface_types.h" 1 3
# 84 "/usr/local/cuda-11.7/include/surface_types.h" 3
enum __attribute__((device_builtin)) cudaSurfaceBoundaryMode
{
    cudaBoundaryModeZero = 0,
    cudaBoundaryModeClamp = 1,
    cudaBoundaryModeTrap = 2
};


enum __attribute__((device_builtin)) cudaSurfaceFormatMode
{
    cudaFormatModeForced = 0,
    cudaFormatModeAuto = 1
};


struct __attribute__((device_builtin)) surfaceReference
{


    struct cudaChannelFormatDesc channelDesc;
};


typedef __attribute__((device_builtin)) unsigned long long cudaSurfaceObject_t;
# 63 "/usr/local/cuda-11.7/include/builtin_types.h" 2 3
# 1 "/usr/local/cuda-11.7/include/texture_types.h" 1 3
# 84 "/usr/local/cuda-11.7/include/texture_types.h" 3
enum __attribute__((device_builtin)) cudaTextureAddressMode
{
    cudaAddressModeWrap = 0,
    cudaAddressModeClamp = 1,
    cudaAddressModeMirror = 2,
    cudaAddressModeBorder = 3
};


enum __attribute__((device_builtin)) cudaTextureFilterMode
{
    cudaFilterModePoint = 0,
    cudaFilterModeLinear = 1
};


enum __attribute__((device_builtin)) cudaTextureReadMode
{
    cudaReadModeElementType = 0,
    cudaReadModeNormalizedFloat = 1
};


struct __attribute__((device_builtin)) textureReference
{


    int normalized;


    enum cudaTextureFilterMode filterMode;


    enum cudaTextureAddressMode addressMode[3];


    struct cudaChannelFormatDesc channelDesc;


    int sRGB;


    unsigned int maxAnisotropy;


    enum cudaTextureFilterMode mipmapFilterMode;


    float mipmapLevelBias;


    float minMipmapLevelClamp;


    float maxMipmapLevelClamp;


    int disableTrilinearOptimization;
    int __cudaReserved[14];
};


struct __attribute__((device_builtin)) cudaTextureDesc
{


    enum cudaTextureAddressMode addressMode[3];


    enum cudaTextureFilterMode filterMode;


    enum cudaTextureReadMode readMode;


    int sRGB;


    float borderColor[4];


    int normalizedCoords;


    unsigned int maxAnisotropy;


    enum cudaTextureFilterMode mipmapFilterMode;


    float mipmapLevelBias;


    float minMipmapLevelClamp;


    float maxMipmapLevelClamp;


    int disableTrilinearOptimization;


    int seamlessCubemap;
};


typedef __attribute__((device_builtin)) unsigned long long cudaTextureObject_t;
# 64 "/usr/local/cuda-11.7/include/builtin_types.h" 2 3
# 92 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 1 "/usr/local/cuda-11.7/include/library_types.h" 1 3
# 55 "/usr/local/cuda-11.7/include/library_types.h" 3
typedef enum cudaDataType_t
{
    CUDA_R_16F = 2,
    CUDA_C_16F = 6,
    CUDA_R_16BF = 14,
    CUDA_C_16BF = 15,
    CUDA_R_32F = 0,
    CUDA_C_32F = 4,
    CUDA_R_64F = 1,
    CUDA_C_64F = 5,
    CUDA_R_4I = 16,
    CUDA_C_4I = 17,
    CUDA_R_4U = 18,
    CUDA_C_4U = 19,
    CUDA_R_8I = 3,
    CUDA_C_8I = 7,
    CUDA_R_8U = 8,
    CUDA_C_8U = 9,
    CUDA_R_16I = 20,
    CUDA_C_16I = 21,
    CUDA_R_16U = 22,
    CUDA_C_16U = 23,
    CUDA_R_32I = 10,
    CUDA_C_32I = 11,
    CUDA_R_32U = 12,
    CUDA_C_32U = 13,
    CUDA_R_64I = 24,
    CUDA_C_64I = 25,
    CUDA_R_64U = 26,
    CUDA_C_64U = 27,


} cudaDataType;


typedef enum libraryPropertyType_t
{
    MAJOR_VERSION,
    MINOR_VERSION,
    PATCH_LEVEL
} libraryPropertyType;
# 93 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3


# 1 "/usr/local/cuda-11.7/include/channel_descriptor.h" 1 3
# 61 "/usr/local/cuda-11.7/include/channel_descriptor.h" 3
# 1 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 1 3
# 147 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 148 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 2 3

# 1 "/usr/local/cuda-11.7/include/builtin_types.h" 1 3
# 150 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 2 3

# 1 "/usr/local/cuda-11.7/include/cuda_device_runtime_api.h" 1 3
# 64 "/usr/local/cuda-11.7/include/cuda_device_runtime_api.h" 3
extern "C" {


struct cudaFuncAttributes;


inline __attribute__((device)) cudaError_t cudaMalloc(void **p, size_t s)
{
  return cudaErrorUnknown;
}

inline __attribute__((device)) cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *p, const void *c)
{
  return cudaErrorUnknown;
}

inline __attribute__((device)) cudaError_t cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device)
{
  return cudaErrorUnknown;
}

inline __attribute__((device)) cudaError_t cudaGetDevice(int *device)
{
  return cudaErrorUnknown;
}

inline __attribute__((device)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize)
{
  return cudaErrorUnknown;
}

inline __attribute__((device)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags)
{
  return cudaErrorUnknown;
}


}
# 129 "/usr/local/cuda-11.7/include/cuda_device_runtime_api.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 130 "/usr/local/cuda-11.7/include/cuda_device_runtime_api.h" 2 3


extern "C"
{
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig);
extern __attribute__((device)) __attribute__((cudart_builtin)) __attribute__((deprecated("Use of ""cudaDeviceSynchronize"" from device code is deprecated and will not be supported in a future release. Disable this warning with -D__CDPRT_SUPPRESS_SYNC_DEPRECATION_WARNING."))) cudaError_t cudaDeviceSynchronize(void);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t __cudaDeviceSynchronizeDeprecationAvoidance(void);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaGetLastError(void);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaPeekAtLastError(void);
extern __attribute__((device)) __attribute__((cudart_builtin)) const char* cudaGetErrorString(cudaError_t error);
extern __attribute__((device)) __attribute__((cudart_builtin)) const char* cudaGetErrorName(cudaError_t error);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaGetDeviceCount(int *count);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaGetDevice(int *device);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaStreamDestroy(cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaStreamWaitEvent_ptsz(cudaStream_t stream, cudaEvent_t event, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaEventRecord_ptsz(cudaEvent_t event, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaEventRecordWithFlags_ptsz(cudaEvent_t event, cudaStream_t stream, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaEventDestroy(cudaEvent_t event);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaFree(void *devPtr);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMalloc(void **devPtr, size_t size);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpyAsync_ptsz(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpy2DAsync_ptsz(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpy3DAsync_ptsz(const struct cudaMemcpy3DParms *p, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemsetAsync_ptsz(void *devPtr, int value, size_t count, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemset2DAsync_ptsz(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMemset3DAsync_ptsz(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaRuntimeGetVersion(int *runtimeVersion);
# 196 "/usr/local/cuda-11.7/include/cuda_device_runtime_api.h" 3
extern __attribute__((device)) __attribute__((cudart_builtin)) void * cudaGetParameterBuffer(size_t alignment, size_t size);
# 224 "/usr/local/cuda-11.7/include/cuda_device_runtime_api.h" 3
extern __attribute__((device)) __attribute__((cudart_builtin)) void * cudaGetParameterBufferV2(void *func, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaLaunchDevice_ptsz(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaLaunchDeviceV2_ptsz(void *parameterBuffer, cudaStream_t stream);
# 244 "/usr/local/cuda-11.7/include/cuda_device_runtime_api.h" 3
    extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaLaunchDevice(void *func, void *parameterBuffer, dim3 gridDimension, dim3 blockDimension, unsigned int sharedMemSize, cudaStream_t stream);
    extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaLaunchDeviceV2(void *parameterBuffer, cudaStream_t stream);


extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSmemSize, unsigned int flags);

extern __attribute__((device)) __attribute__((cudart_builtin)) unsigned long long cudaCGGetIntrinsicHandle(enum cudaCGScope scope);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaCGSynchronize(unsigned long long handle, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaCGSynchronizeGrid(unsigned long long handle, unsigned int flags);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaCGGetSize(unsigned int *numThreads, unsigned int *numGrids, unsigned long long handle);
extern __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaCGGetRank(unsigned int *threadRank, unsigned int *gridRank, unsigned long long handle);
}

template <typename T> static __inline__ __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaMalloc(T **devPtr, size_t size);
template <typename T> static __inline__ __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, T *entry);
template <typename T> static __inline__ __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize);
template <typename T> static __inline__ __attribute__((device)) __attribute__((cudart_builtin)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, T func, int blockSize, size_t dynamicSmemSize, unsigned int flags);
# 152 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 2 3
# 269 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern "C" {
# 309 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceReset(void);
# 331 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceSynchronize(void);
# 418 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceSetLimit(enum cudaLimit limit, size_t value);
# 453 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit);
# 476 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, int device);
# 510 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig);
# 547 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority);
# 591 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig);
# 622 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig);
# 666 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config);
# 693 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceGetByPCIBusId(int *device, const char *pciBusId);
# 723 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceGetPCIBusId(char *pciBusId, int len, int device);
# 771 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event);
# 812 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle);
# 855 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr);
# 919 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaIpcOpenMemHandle(void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags);
# 955 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaIpcCloseMemHandle(void *devPtr);
# 987 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceFlushGPUDirectRDMAWrites(enum cudaFlushGPUDirectRDMAWritesTarget target, enum cudaFlushGPUDirectRDMAWritesScope scope);
# 1031 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaThreadExit(void);
# 1057 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaThreadSynchronize(void);
# 1106 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaThreadSetLimit(enum cudaLimit limit, size_t value);
# 1139 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit);
# 1175 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig);
# 1222 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig);
# 1285 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaGetLastError(void);
# 1333 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaPeekAtLastError(void);
# 1349 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) const char* cudaGetErrorName(cudaError_t error);
# 1365 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) const char* cudaGetErrorString(cudaError_t error);
# 1393 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaGetDeviceCount(int *count);
# 1666 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device);
# 1868 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device);
# 1886 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device);
# 1910 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceSetMemPool(int device, cudaMemPool_t memPool);
# 1930 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device);
# 1978 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, int device, int flags);
# 2018 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, int srcDevice, int dstDevice);
# 2039 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaChooseDevice(int *device, const struct cudaDeviceProp *prop);
# 2083 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaSetDevice(int device);
# 2104 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaGetDevice(int *device);
# 2135 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaSetValidDevices(int *device_arr, int len);
# 2200 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaSetDeviceFlags( unsigned int flags );
# 2244 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetDeviceFlags( unsigned int *flags );
# 2284 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamCreate(cudaStream_t *pStream);
# 2316 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags);
# 2362 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, int priority);
# 2389 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamGetPriority(cudaStream_t hStream, int *priority);
# 2414 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags);
# 2429 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaCtxResetPersistingL2Cache(void);
# 2449 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src);
# 2470 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamGetAttribute(
        cudaStream_t hStream, cudaStreamAttrID attr,
        cudaStreamAttrValue *value_out);
# 2494 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamSetAttribute(
        cudaStream_t hStream, cudaStreamAttrID attr,
        const cudaStreamAttrValue *value);
# 2528 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamDestroy(cudaStream_t stream);
# 2559 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags = 0);


typedef void ( *cudaStreamCallback_t)(cudaStream_t stream, cudaError_t status, void *userData);
# 2634 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamAddCallback(cudaStream_t stream,
        cudaStreamCallback_t callback, void *userData, unsigned int flags);
# 2658 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamSynchronize(cudaStream_t stream);
# 2683 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamQuery(cudaStream_t stream);
# 2767 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, size_t length = 0, unsigned int flags = 0x04);
# 2806 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode);
# 2857 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode);
# 2885 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph);
# 2923 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamIsCapturing(cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus);
# 2955 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamGetCaptureInfo(cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus, unsigned long long *pId);
# 3010 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamGetCaptureInfo_v2(cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, unsigned long long *id_out = 0, cudaGraph_t *graph_out = 0, const cudaGraphNode_t **dependencies_out = 0, size_t *numDependencies_out = 0);
# 3043 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaStreamUpdateCaptureDependencies(cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, unsigned int flags = 0);
# 3080 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaEventCreate(cudaEvent_t *event);
# 3117 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags);
# 3157 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaEventRecord(cudaEvent_t event, cudaStream_t stream = 0);
# 3204 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream = 0, unsigned int flags = 0);
# 3236 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaEventQuery(cudaEvent_t event);
# 3266 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaEventSynchronize(cudaEvent_t event);
# 3295 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaEventDestroy(cudaEvent_t event);
# 3338 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaEventElapsedTime(float *ms, cudaEvent_t start, cudaEvent_t end);
# 3518 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaImportExternalMemory(cudaExternalMemory_t *extMem_out, const struct cudaExternalMemoryHandleDesc *memHandleDesc);
# 3573 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaExternalMemoryGetMappedBuffer(void **devPtr, cudaExternalMemory_t extMem, const struct cudaExternalMemoryBufferDesc *bufferDesc);
# 3635 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaExternalMemoryGetMappedMipmappedArray(cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc);
# 3659 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDestroyExternalMemory(cudaExternalMemory_t extMem);
# 3812 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaImportExternalSemaphore(cudaExternalSemaphore_t *extSem_out, const struct cudaExternalSemaphoreHandleDesc *semHandleDesc);
# 3879 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaSignalExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t *extSemArray, const struct cudaExternalSemaphoreSignalParams *paramsArray, unsigned int numExtSems, cudaStream_t stream = 0);
# 3955 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaWaitExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t *extSemArray, const struct cudaExternalSemaphoreWaitParams *paramsArray, unsigned int numExtSems, cudaStream_t stream = 0);
# 3978 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem);
# 4045 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream);
# 4106 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaLaunchCooperativeKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, size_t sharedMem, cudaStream_t stream);
# 4207 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaLaunchCooperativeKernelMultiDevice(struct cudaLaunchParams *launchParamsList, unsigned int numDevices, unsigned int flags = 0);
# 4254 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig);
# 4309 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config);
# 4342 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func);
# 4379 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value);
# 4405 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaSetDoubleForDevice(double *d);
# 4429 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaSetDoubleForHost(double *d);
# 4497 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void *userData);
# 4554 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, int blockSize, size_t dynamicSMemSize);
# 4583 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, const void *func, int numBlocks, int blockSize);
# 4628 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, const void *func, int blockSize, size_t dynamicSMemSize, unsigned int flags);
# 4749 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMallocManaged(void **devPtr, size_t size, unsigned int flags = 0x01);
# 4782 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMalloc(void **devPtr, size_t size);
# 4815 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMallocHost(void **ptr, size_t size);
# 4858 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMallocPitch(void **devPtr, size_t *pitch, size_t width, size_t height);
# 4912 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMallocArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, size_t height = 0, unsigned int flags = 0);
# 4950 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaFree(void *devPtr);
# 4973 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaFreeHost(void *ptr);
# 4996 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaFreeArray(cudaArray_t array);
# 5019 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray);
# 5085 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaHostAlloc(void **pHost, size_t size, unsigned int flags);
# 5178 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaHostRegister(void *ptr, size_t size, unsigned int flags);
# 5201 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaHostUnregister(void *ptr);
# 5246 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags);
# 5268 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaHostGetFlags(unsigned int *pFlags, void *pHost);
# 5307 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMalloc3D(struct cudaPitchedPtr* pitchedDevPtr, struct cudaExtent extent);
# 5454 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc* desc, struct cudaExtent extent, unsigned int flags = 0);
# 5601 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMallocMipmappedArray(cudaMipmappedArray_t *mipmappedArray, const struct cudaChannelFormatDesc* desc, struct cudaExtent extent, unsigned int numLevels, unsigned int flags = 0);
# 5634 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetMipmappedArrayLevel(cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level);
# 5739 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy3D(const struct cudaMemcpy3DParms *p);
# 5770 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p);
# 5888 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpy3DAsync(const struct cudaMemcpy3DParms *p, cudaStream_t stream = 0);
# 5914 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy3DPeerAsync(const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream = 0);
# 5948 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemGetInfo(size_t *free, size_t *total);
# 5974 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, unsigned int *flags, cudaArray_t array);
# 6003 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaArrayGetPlane(cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx);
# 6027 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaArrayGetMemoryRequirements(struct cudaArrayMemoryRequirements *memoryRequirements, cudaArray_t array, int device);
# 6051 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMipmappedArrayGetMemoryRequirements(struct cudaArrayMemoryRequirements *memoryRequirements, cudaMipmappedArray_t mipmap, int device);
# 6080 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaArrayGetSparseProperties(struct cudaArraySparseProperties *sparseProperties, cudaArray_t array);
# 6110 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaMipmappedArrayGetSparseProperties(struct cudaArraySparseProperties *sparseProperties, cudaMipmappedArray_t mipmap);
# 6155 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind);
# 6190 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpyPeer(void *dst, int dstDevice, const void *src, int srcDevice, size_t count);
# 6239 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy2D(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
# 6289 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind);
# 6339 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy2DFromArray(void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind);
# 6386 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, enum cudaMemcpyKind kind = cudaMemcpyDeviceToDevice);
# 6429 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpyToSymbol(const void *symbol, const void *src, size_t count, size_t offset = 0, enum cudaMemcpyKind kind = cudaMemcpyHostToDevice);
# 6472 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpyFromSymbol(void *dst, const void *symbol, size_t count, size_t offset = 0, enum cudaMemcpyKind kind = cudaMemcpyDeviceToHost);
# 6529 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpyAsync(void *dst, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 6564 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, size_t count, cudaStream_t stream = 0);
# 6627 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMemcpy2DAsync(void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 6685 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy2DToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 6742 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpy2DFromArrayAsync(void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 6793 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpyToSymbolAsync(const void *symbol, const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 6844 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemcpyFromSymbolAsync(void *dst, const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 6873 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemset(void *devPtr, int value, size_t count);
# 6907 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemset2D(void *devPtr, size_t pitch, int value, size_t width, size_t height);
# 6953 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemset3D(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent);
# 6989 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMemsetAsync(void *devPtr, int value, size_t count, cudaStream_t stream = 0);
# 7030 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream = 0);
# 7083 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent, cudaStream_t stream = 0);
# 7111 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetSymbolAddress(void **devPtr, const void *symbol);
# 7138 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetSymbolSize(size_t *size, const void *symbol);
# 7208 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, cudaStream_t stream = 0);
# 7324 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, int device);
# 7383 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemRangeGetAttribute(void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, const void *devPtr, size_t count);
# 7422 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemRangeGetAttributes(void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, size_t numAttributes, const void *devPtr, size_t count);
# 7482 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind);
# 7524 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind);
# 7567 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaMemcpyArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, enum cudaMemcpyKind kind = cudaMemcpyDeviceToDevice);
# 7618 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaMemcpyToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 7668 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, enum cudaMemcpyKind kind, cudaStream_t stream = 0);
# 7737 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMallocAsync(void **devPtr, size_t size, cudaStream_t hStream);
# 7763 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaFreeAsync(void *devPtr, cudaStream_t hStream);
# 7788 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolTrimTo(cudaMemPool_t memPool, size_t minBytesToKeep);
# 7832 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolSetAttribute(cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value );
# 7880 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolGetAttribute(cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value );
# 7895 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolSetAccess(cudaMemPool_t memPool, const struct cudaMemAccessDesc *descList, size_t count);
# 7908 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, struct cudaMemLocation *location);
# 7928 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolCreate(cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps);
# 7950 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolDestroy(cudaMemPool_t memPool);
# 7986 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMallocFromPoolAsync(void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream);
# 8011 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolExportToShareableHandle(
    void *shareableHandle,
    cudaMemPool_t memPool,
    enum cudaMemAllocationHandleType handleType,
    unsigned int flags);
# 8038 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolImportFromShareableHandle(
    cudaMemPool_t *memPool,
    void *shareableHandle,
    enum cudaMemAllocationHandleType handleType,
    unsigned int flags);
# 8061 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolExportPointer(struct cudaMemPoolPtrExportData *exportData, void *ptr);
# 8090 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, struct cudaMemPoolPtrExportData *exportData);
# 8242 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaPointerGetAttributes(struct cudaPointerAttributes *attributes, const void *ptr);
# 8283 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice);
# 8325 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags);
# 8347 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceDisablePeerAccess(int peerDevice);
# 8411 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource);
# 8446 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphicsResourceSetMapFlags(cudaGraphicsResource_t resource, unsigned int flags);
# 8485 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphicsMapResources(int count, cudaGraphicsResource_t *resources, cudaStream_t stream = 0);
# 8520 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t *resources, cudaStream_t stream = 0);
# 8552 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphicsResourceGetMappedPointer(void **devPtr, size_t *size, cudaGraphicsResource_t resource);
# 8590 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphicsSubResourceGetMappedArray(cudaArray_t *array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel);
# 8619 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphicsResourceGetMappedMipmappedArray(cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource);
# 8690 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaBindTexture(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t size = (2147483647 *2U +1U));
# 8749 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaBindTexture2D(size_t *offset, const struct textureReference *texref, const void *devPtr, const struct cudaChannelFormatDesc *desc, size_t width, size_t height, size_t pitch);
# 8787 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaBindTextureToArray(const struct textureReference *texref, cudaArray_const_t array, const struct cudaChannelFormatDesc *desc);
# 8827 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaBindTextureToMipmappedArray(const struct textureReference *texref, cudaMipmappedArray_const_t mipmappedArray, const struct cudaChannelFormatDesc *desc);
# 8853 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaUnbindTexture(const struct textureReference *texref);
# 8882 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaGetTextureAlignmentOffset(size_t *offset, const struct textureReference *texref);
# 8912 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaGetTextureReference(const struct textureReference **texref, const void *symbol);
# 8957 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaBindSurfaceToArray(const struct surfaceReference *surfref, cudaArray_const_t array, const struct cudaChannelFormatDesc *desc);
# 8982 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((deprecated)) __attribute__((host)) cudaError_t cudaGetSurfaceReference(const struct surfaceReference **surfref, const void *symbol);
# 9017 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetChannelDesc(struct cudaChannelFormatDesc *desc, cudaArray_const_t array);
# 9047 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) struct cudaChannelFormatDesc cudaCreateChannelDesc(int x, int y, int z, int w, enum cudaChannelFormatKind f);
# 9271 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaCreateTextureObject(cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, const struct cudaTextureDesc *pTexDesc, const struct cudaResourceViewDesc *pResViewDesc);
# 9291 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDestroyTextureObject(cudaTextureObject_t texObject);
# 9311 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetTextureObjectResourceDesc(struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject);
# 9331 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetTextureObjectTextureDesc(struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject);
# 9352 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetTextureObjectResourceViewDesc(struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject);
# 9397 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaCreateSurfaceObject(cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc);
# 9417 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject);
# 9436 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetSurfaceObjectResourceDesc(struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject);
# 9470 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDriverGetVersion(int *driverVersion);
# 9495 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) __attribute__((cudart_builtin)) cudaError_t cudaRuntimeGetVersion(int *runtimeVersion);
# 9542 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphCreate(cudaGraph_t *pGraph, unsigned int flags);
# 9639 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddKernelNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaKernelNodeParams *pNodeParams);
# 9672 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphKernelNodeGetParams(cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams);
# 9697 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams);
# 9717 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphKernelNodeCopyAttributes(
        cudaGraphNode_t hSrc,
        cudaGraphNode_t hDst);
# 9740 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphKernelNodeGetAttribute(
    cudaGraphNode_t hNode,
    cudaKernelNodeAttrID attr,
    cudaKernelNodeAttrValue *value_out);
# 9764 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphKernelNodeSetAttribute(
    cudaGraphNode_t hNode,
    cudaKernelNodeAttrID attr,
    const cudaKernelNodeAttrValue *value);
# 9814 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddMemcpyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaMemcpy3DParms *pCopyParams);
# 9873 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphAddMemcpyNodeToSymbol(
    cudaGraphNode_t *pGraphNode,
    cudaGraph_t graph,
    const cudaGraphNode_t *pDependencies,
    size_t numDependencies,
    const void* symbol,
    const void* src,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind);
# 9942 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphAddMemcpyNodeFromSymbol(
    cudaGraphNode_t* pGraphNode,
    cudaGraph_t graph,
    const cudaGraphNode_t* pDependencies,
    size_t numDependencies,
    void* dst,
    const void* symbol,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind);
# 10010 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphAddMemcpyNode1D(
    cudaGraphNode_t *pGraphNode,
    cudaGraph_t graph,
    const cudaGraphNode_t *pDependencies,
    size_t numDependencies,
    void* dst,
    const void* src,
    size_t count,
    enum cudaMemcpyKind kind);
# 10042 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams);
# 10068 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams);
# 10107 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphMemcpyNodeSetParamsToSymbol(
    cudaGraphNode_t node,
    const void* symbol,
    const void* src,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind);
# 10153 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphMemcpyNodeSetParamsFromSymbol(
    cudaGraphNode_t node,
    void* dst,
    const void* symbol,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind);
# 10199 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphMemcpyNodeSetParams1D(
    cudaGraphNode_t node,
    void* dst,
    const void* src,
    size_t count,
    enum cudaMemcpyKind kind);
# 10246 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddMemsetNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaMemsetParams *pMemsetParams);
# 10269 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams);
# 10292 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams);
# 10333 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddHostNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaHostNodeParams *pNodeParams);
# 10356 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphHostNodeGetParams(cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams);
# 10379 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphHostNodeSetParams(cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams);
# 10419 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaGraph_t childGraph);
# 10446 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph);
# 10483 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddEmptyNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies);
# 10526 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaEvent_t event);
# 10553 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out);
# 10580 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event);
# 10626 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, cudaEvent_t event);
# 10653 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out);
# 10680 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event);
# 10729 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddExternalSemaphoresSignalNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaExternalSemaphoreSignalNodeParams *nodeParams);
# 10762 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExternalSemaphoresSignalNodeGetParams(cudaGraphNode_t hNode, struct cudaExternalSemaphoreSignalNodeParams *params_out);
# 10789 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExternalSemaphoresSignalNodeSetParams(cudaGraphNode_t hNode, const struct cudaExternalSemaphoreSignalNodeParams *nodeParams);
# 10838 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddExternalSemaphoresWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, const struct cudaExternalSemaphoreWaitNodeParams *nodeParams);
# 10871 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExternalSemaphoresWaitNodeGetParams(cudaGraphNode_t hNode, struct cudaExternalSemaphoreWaitNodeParams *params_out);
# 10898 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExternalSemaphoresWaitNodeSetParams(cudaGraphNode_t hNode, const struct cudaExternalSemaphoreWaitNodeParams *nodeParams);
# 10975 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddMemAllocNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, struct cudaMemAllocNodeParams *nodeParams);
# 11002 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphMemAllocNodeGetParams(cudaGraphNode_t node, struct cudaMemAllocNodeParams *params_out);
# 11062 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddMemFreeNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dptr);
# 11086 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void *dptr_out);
# 11114 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceGraphMemTrim(int device);
# 11151 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceGetGraphMemAttribute(int device, enum cudaGraphMemAttributeType attr, void* value);
# 11185 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaDeviceSetGraphMemAttribute(int device, enum cudaGraphMemAttributeType attr, void* value);
# 11213 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph);
# 11241 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph);
# 11272 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType);
# 11303 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t *nodes, size_t *numNodes);
# 11334 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes);
# 11368 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t *from, cudaGraphNode_t *to, size_t *numEdges);
# 11399 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t *pDependencies, size_t *pNumDependencies);
# 11431 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, size_t *pNumDependentNodes);
# 11462 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, const cudaGraphNode_t *to, size_t numDependencies);
# 11493 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, const cudaGraphNode_t *to, size_t numDependencies);
# 11523 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphDestroyNode(cudaGraphNode_t node);
# 11561 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, cudaGraphNode_t *pErrorNode, char *pLogBuffer, size_t bufferSize);
# 11611 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphInstantiateWithFlags(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, unsigned long long flags);
# 11655 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams);
# 11705 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams);
# 11760 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphExecMemcpyNodeSetParamsToSymbol(
    cudaGraphExec_t hGraphExec,
    cudaGraphNode_t node,
    const void* symbol,
    const void* src,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind);
# 11823 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphExecMemcpyNodeSetParamsFromSymbol(
    cudaGraphExec_t hGraphExec,
    cudaGraphNode_t node,
    void* dst,
    const void* symbol,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind);
# 11884 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphExecMemcpyNodeSetParams1D(
    cudaGraphExec_t hGraphExec,
    cudaGraphNode_t node,
    void* dst,
    const void* src,
    size_t count,
    enum cudaMemcpyKind kind);
# 11938 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams);
# 11977 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams);
# 12023 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph);
# 12067 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event);
# 12111 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event);
# 12158 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecExternalSemaphoresSignalNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const struct cudaExternalSemaphoreSignalNodeParams *nodeParams);
# 12205 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecExternalSemaphoresWaitNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const struct cudaExternalSemaphoreWaitNodeParams *nodeParams);
# 12284 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphNodeSetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled);
# 12351 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int *isEnabled);
# 12510 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphNode_t *hErrorNode_out, enum cudaGraphExecUpdateResult *updateResult_out);
# 12535 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
 extern __attribute__((host)) cudaError_t cudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream);
# 12566 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream);
# 12589 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphExecDestroy(cudaGraphExec_t graphExec);
# 12610 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphDestroy(cudaGraph_t graph);
# 12629 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphDebugDotPrint(cudaGraph_t graph, const char *path, unsigned int flags);
# 12665 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaUserObjectCreate(cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, unsigned int initialRefcount, unsigned int flags);
# 12689 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaUserObjectRetain(cudaUserObject_t object, unsigned int count = 1);
# 12717 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaUserObjectRelease(cudaUserObject_t object, unsigned int count = 1);
# 12745 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphRetainUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count = 1, unsigned int flags = 0);
# 12770 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGraphReleaseUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count = 1);
# 12836 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetDriverEntryPoint(const char *symbol, void **funcPtr, unsigned long long flags);


extern __attribute__((host)) cudaError_t cudaGetExportTable(const void **ppExportTable, const cudaUUID_t *pExportTableId);
# 13017 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
extern __attribute__((host)) cudaError_t cudaGetFuncBySymbol(cudaFunction_t* functionPtr, const void* symbolPtr);
# 13175 "/usr/local/cuda-11.7/include/cuda_runtime_api.h" 3
}
# 62 "/usr/local/cuda-11.7/include/channel_descriptor.h" 2 3
# 124 "/usr/local/cuda-11.7/include/channel_descriptor.h" 3
template<class T> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc(void)
{
  return cudaCreateChannelDesc(0, 0, 0, 0, cudaChannelFormatKindNone);
}

static __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDescHalf(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}

static __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDescHalf1(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}

static __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDescHalf2(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
}

static __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDescHalf4(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<char>(void)
{
  int e = (int)sizeof(char) * 8;


  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);

}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<signed char>(void)
{
  int e = (int)sizeof(signed char) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<unsigned char>(void)
{
  int e = (int)sizeof(unsigned char) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<char1>(void)
{
  int e = (int)sizeof(signed char) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<uchar1>(void)
{
  int e = (int)sizeof(unsigned char) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<char2>(void)
{
  int e = (int)sizeof(signed char) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<uchar2>(void)
{
  int e = (int)sizeof(unsigned char) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<char4>(void)
{
  int e = (int)sizeof(signed char) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<uchar4>(void)
{
  int e = (int)sizeof(unsigned char) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<short>(void)
{
  int e = (int)sizeof(short) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<unsigned short>(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<short1>(void)
{
  int e = (int)sizeof(short) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<ushort1>(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<short2>(void)
{
  int e = (int)sizeof(short) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<ushort2>(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<short4>(void)
{
  int e = (int)sizeof(short) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<ushort4>(void)
{
  int e = (int)sizeof(unsigned short) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<int>(void)
{
  int e = (int)sizeof(int) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<unsigned int>(void)
{
  int e = (int)sizeof(unsigned int) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<int1>(void)
{
  int e = (int)sizeof(int) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<uint1>(void)
{
  int e = (int)sizeof(unsigned int) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<int2>(void)
{
  int e = (int)sizeof(int) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<uint2>(void)
{
  int e = (int)sizeof(unsigned int) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindUnsigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<int4>(void)
{
  int e = (int)sizeof(int) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindSigned);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<uint4>(void)
{
  int e = (int)sizeof(unsigned int) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindUnsigned);
}
# 396 "/usr/local/cuda-11.7/include/channel_descriptor.h" 3
template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<float>(void)
{
  int e = (int)sizeof(float) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<float1>(void)
{
  int e = (int)sizeof(float) * 8;

  return cudaCreateChannelDesc(e, 0, 0, 0, cudaChannelFormatKindFloat);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<float2>(void)
{
  int e = (int)sizeof(float) * 8;

  return cudaCreateChannelDesc(e, e, 0, 0, cudaChannelFormatKindFloat);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<float4>(void)
{
  int e = (int)sizeof(float) * 8;

  return cudaCreateChannelDesc(e, e, e, e, cudaChannelFormatKindFloat);
}

static __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDescNV12(void)
{
    int e = (int)sizeof(char) * 8;

    return cudaCreateChannelDesc(e, e, e, 0, cudaChannelFormatKindNV12);
}

template<cudaChannelFormatKind> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc(void)
{
    return cudaCreateChannelDesc(0, 0, 0, 0, cudaChannelFormatKindNone);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedNormalized8X1>(void)
{
    return cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindSignedNormalized8X1);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedNormalized8X2>(void)
{
    return cudaCreateChannelDesc(8, 8, 0, 0, cudaChannelFormatKindSignedNormalized8X2);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedNormalized8X4>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindSignedNormalized8X4);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedNormalized8X1>(void)
{
    return cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsignedNormalized8X1);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedNormalized8X2>(void)
{
    return cudaCreateChannelDesc(8, 8, 0, 0, cudaChannelFormatKindUnsignedNormalized8X2);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedNormalized8X4>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedNormalized8X4);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedNormalized16X1>(void)
{
    return cudaCreateChannelDesc(16, 0, 0, 0, cudaChannelFormatKindSignedNormalized16X1);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedNormalized16X2>(void)
{
    return cudaCreateChannelDesc(16, 16, 0, 0, cudaChannelFormatKindSignedNormalized16X2);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedNormalized16X4>(void)
{
    return cudaCreateChannelDesc(16, 16, 16, 16, cudaChannelFormatKindSignedNormalized16X4);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedNormalized16X1>(void)
{
    return cudaCreateChannelDesc(16, 0, 0, 0, cudaChannelFormatKindUnsignedNormalized16X1);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedNormalized16X2>(void)
{
    return cudaCreateChannelDesc(16, 16, 0, 0, cudaChannelFormatKindUnsignedNormalized16X2);
}

template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedNormalized16X4>(void)
{
    return cudaCreateChannelDesc(16, 16, 16, 16, cudaChannelFormatKindUnsignedNormalized16X4);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindNV12>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 0, cudaChannelFormatKindNV12);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed1>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed1);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed1SRGB>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed1SRGB);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed2>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed2);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed2SRGB>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed2SRGB);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed3>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed3);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed3SRGB>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed3SRGB);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed4>(void)
{
    return cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsignedBlockCompressed4);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedBlockCompressed4>(void)
{
    return cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindSignedBlockCompressed4);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed5>(void)
{
    return cudaCreateChannelDesc(8, 8, 0, 0, cudaChannelFormatKindUnsignedBlockCompressed5);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedBlockCompressed5>(void)
{
    return cudaCreateChannelDesc(8, 8, 0, 0, cudaChannelFormatKindSignedBlockCompressed5);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed6H>(void)
{
    return cudaCreateChannelDesc(16, 16, 16, 0, cudaChannelFormatKindUnsignedBlockCompressed6H);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindSignedBlockCompressed6H>(void)
{
    return cudaCreateChannelDesc(16, 16, 16, 0, cudaChannelFormatKindSignedBlockCompressed6H);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed7>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed7);
}


template<> __inline__ __attribute__((host)) cudaChannelFormatDesc cudaCreateChannelDesc<cudaChannelFormatKindUnsignedBlockCompressed7SRGB>(void)
{
    return cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsignedBlockCompressed7SRGB);
}
# 96 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3

# 1 "/usr/local/cuda-11.7/include/driver_functions.h" 1 3
# 53 "/usr/local/cuda-11.7/include/driver_functions.h" 3
# 1 "/usr/local/cuda-11.7/include/builtin_types.h" 1 3
# 54 "/usr/local/cuda-11.7/include/driver_functions.h" 2 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 55 "/usr/local/cuda-11.7/include/driver_functions.h" 2 3
# 79 "/usr/local/cuda-11.7/include/driver_functions.h" 3
static __inline__ __attribute__((host)) struct cudaPitchedPtr make_cudaPitchedPtr(void *d, size_t p, size_t xsz, size_t ysz)
{
  struct cudaPitchedPtr s;

  s.ptr = d;
  s.pitch = p;
  s.xsize = xsz;
  s.ysize = ysz;

  return s;
}
# 106 "/usr/local/cuda-11.7/include/driver_functions.h" 3
static __inline__ __attribute__((host)) struct cudaPos make_cudaPos(size_t x, size_t y, size_t z)
{
  struct cudaPos p;

  p.x = x;
  p.y = y;
  p.z = z;

  return p;
}
# 132 "/usr/local/cuda-11.7/include/driver_functions.h" 3
static __inline__ __attribute__((host)) struct cudaExtent make_cudaExtent(size_t w, size_t h, size_t d)
{
  struct cudaExtent e;

  e.width = w;
  e.height = h;
  e.depth = d;

  return e;
}
# 98 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3


# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 101 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 1 "/usr/local/cuda-11.7/include/vector_functions.h" 1 3
# 73 "/usr/local/cuda-11.7/include/vector_functions.h" 3
static __inline__ __attribute__((host)) __attribute__((device)) char1 make_char1(signed char x);

static __inline__ __attribute__((host)) __attribute__((device)) uchar1 make_uchar1(unsigned char x);

static __inline__ __attribute__((host)) __attribute__((device)) char2 make_char2(signed char x, signed char y);

static __inline__ __attribute__((host)) __attribute__((device)) uchar2 make_uchar2(unsigned char x, unsigned char y);

static __inline__ __attribute__((host)) __attribute__((device)) char3 make_char3(signed char x, signed char y, signed char z);

static __inline__ __attribute__((host)) __attribute__((device)) uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z);

static __inline__ __attribute__((host)) __attribute__((device)) char4 make_char4(signed char x, signed char y, signed char z, signed char w);

static __inline__ __attribute__((host)) __attribute__((device)) uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w);

static __inline__ __attribute__((host)) __attribute__((device)) short1 make_short1(short x);

static __inline__ __attribute__((host)) __attribute__((device)) ushort1 make_ushort1(unsigned short x);

static __inline__ __attribute__((host)) __attribute__((device)) short2 make_short2(short x, short y);

static __inline__ __attribute__((host)) __attribute__((device)) ushort2 make_ushort2(unsigned short x, unsigned short y);

static __inline__ __attribute__((host)) __attribute__((device)) short3 make_short3(short x,short y, short z);

static __inline__ __attribute__((host)) __attribute__((device)) ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z);

static __inline__ __attribute__((host)) __attribute__((device)) short4 make_short4(short x, short y, short z, short w);

static __inline__ __attribute__((host)) __attribute__((device)) ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w);

static __inline__ __attribute__((host)) __attribute__((device)) int1 make_int1(int x);

static __inline__ __attribute__((host)) __attribute__((device)) uint1 make_uint1(unsigned int x);

static __inline__ __attribute__((host)) __attribute__((device)) int2 make_int2(int x, int y);

static __inline__ __attribute__((host)) __attribute__((device)) uint2 make_uint2(unsigned int x, unsigned int y);

static __inline__ __attribute__((host)) __attribute__((device)) int3 make_int3(int x, int y, int z);

static __inline__ __attribute__((host)) __attribute__((device)) uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z);

static __inline__ __attribute__((host)) __attribute__((device)) int4 make_int4(int x, int y, int z, int w);

static __inline__ __attribute__((host)) __attribute__((device)) uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w);

static __inline__ __attribute__((host)) __attribute__((device)) long1 make_long1(long int x);

static __inline__ __attribute__((host)) __attribute__((device)) ulong1 make_ulong1(unsigned long int x);

static __inline__ __attribute__((host)) __attribute__((device)) long2 make_long2(long int x, long int y);

static __inline__ __attribute__((host)) __attribute__((device)) ulong2 make_ulong2(unsigned long int x, unsigned long int y);

static __inline__ __attribute__((host)) __attribute__((device)) long3 make_long3(long int x, long int y, long int z);

static __inline__ __attribute__((host)) __attribute__((device)) ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z);

static __inline__ __attribute__((host)) __attribute__((device)) long4 make_long4(long int x, long int y, long int z, long int w);

static __inline__ __attribute__((host)) __attribute__((device)) ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w);

static __inline__ __attribute__((host)) __attribute__((device)) float1 make_float1(float x);

static __inline__ __attribute__((host)) __attribute__((device)) float2 make_float2(float x, float y);

static __inline__ __attribute__((host)) __attribute__((device)) float3 make_float3(float x, float y, float z);

static __inline__ __attribute__((host)) __attribute__((device)) float4 make_float4(float x, float y, float z, float w);

static __inline__ __attribute__((host)) __attribute__((device)) longlong1 make_longlong1(long long int x);

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong1 make_ulonglong1(unsigned long long int x);

static __inline__ __attribute__((host)) __attribute__((device)) longlong2 make_longlong2(long long int x, long long int y);

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y);

static __inline__ __attribute__((host)) __attribute__((device)) longlong3 make_longlong3(long long int x, long long int y, long long int z);

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z);

static __inline__ __attribute__((host)) __attribute__((device)) longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w);

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w);

static __inline__ __attribute__((host)) __attribute__((device)) double1 make_double1(double x);

static __inline__ __attribute__((host)) __attribute__((device)) double2 make_double2(double x, double y);

static __inline__ __attribute__((host)) __attribute__((device)) double3 make_double3(double x, double y, double z);

static __inline__ __attribute__((host)) __attribute__((device)) double4 make_double4(double x, double y, double z, double w);


# 1 "/usr/local/cuda-11.7/include/vector_functions.hpp" 1 3
# 73 "/usr/local/cuda-11.7/include/vector_functions.hpp" 3
static __inline__ __attribute__((host)) __attribute__((device)) char1 make_char1(signed char x)
{
  char1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uchar1 make_uchar1(unsigned char x)
{
  uchar1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) char2 make_char2(signed char x, signed char y)
{
  char2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uchar2 make_uchar2(unsigned char x, unsigned char y)
{
  uchar2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) char3 make_char3(signed char x, signed char y, signed char z)
{
  char3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z)
{
  uchar3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) char4 make_char4(signed char x, signed char y, signed char z, signed char w)
{
  char4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w)
{
  uchar4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) short1 make_short1(short x)
{
  short1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ushort1 make_ushort1(unsigned short x)
{
  ushort1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) short2 make_short2(short x, short y)
{
  short2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ushort2 make_ushort2(unsigned short x, unsigned short y)
{
  ushort2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) short3 make_short3(short x,short y, short z)
{
  short3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z)
{
  ushort3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) short4 make_short4(short x, short y, short z, short w)
{
  short4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w)
{
  ushort4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) int1 make_int1(int x)
{
  int1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uint1 make_uint1(unsigned int x)
{
  uint1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) int2 make_int2(int x, int y)
{
  int2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uint2 make_uint2(unsigned int x, unsigned int y)
{
  uint2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) int3 make_int3(int x, int y, int z)
{
  int3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z)
{
  uint3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) int4 make_int4(int x, int y, int z, int w)
{
  int4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w)
{
  uint4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) long1 make_long1(long int x)
{
  long1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulong1 make_ulong1(unsigned long int x)
{
  ulong1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) long2 make_long2(long int x, long int y)
{
  long2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulong2 make_ulong2(unsigned long int x, unsigned long int y)
{
  ulong2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) long3 make_long3(long int x, long int y, long int z)
{
  long3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulong3 make_ulong3(unsigned long int x, unsigned long int y, unsigned long int z)
{
  ulong3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) long4 make_long4(long int x, long int y, long int z, long int w)
{
  long4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulong4 make_ulong4(unsigned long int x, unsigned long int y, unsigned long int z, unsigned long int w)
{
  ulong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) float1 make_float1(float x)
{
  float1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) float2 make_float2(float x, float y)
{
  float2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) float3 make_float3(float x, float y, float z)
{
  float3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) float4 make_float4(float x, float y, float z, float w)
{
  float4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) longlong1 make_longlong1(long long int x)
{
  longlong1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong1 make_ulonglong1(unsigned long long int x)
{
  ulonglong1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) longlong2 make_longlong2(long long int x, long long int y)
{
  longlong2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong2 make_ulonglong2(unsigned long long int x, unsigned long long int y)
{
  ulonglong2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) longlong3 make_longlong3(long long int x, long long int y, long long int z)
{
  longlong3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong3 make_ulonglong3(unsigned long long int x, unsigned long long int y, unsigned long long int z)
{
  ulonglong3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) longlong4 make_longlong4(long long int x, long long int y, long long int z, long long int w)
{
  longlong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) ulonglong4 make_ulonglong4(unsigned long long int x, unsigned long long int y, unsigned long long int z, unsigned long long int w)
{
  ulonglong4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) double1 make_double1(double x)
{
  double1 t; t.x = x; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) double2 make_double2(double x, double y)
{
  double2 t; t.x = x; t.y = y; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) double3 make_double3(double x, double y, double z)
{
  double3 t; t.x = x; t.y = y; t.z = z; return t;
}

static __inline__ __attribute__((host)) __attribute__((device)) double4 make_double4(double x, double y, double z, double w)
{
  double4 t; t.x = x; t.y = y; t.z = z; t.w = w; return t;
}
# 173 "/usr/local/cuda-11.7/include/vector_functions.h" 2 3
# 102 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 115 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/common_functions.h" 1 3
# 116 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 1 "/usr/local/cuda-11.7/include/cuda_surface_types.h" 1 3
# 74 "/usr/local/cuda-11.7/include/cuda_surface_types.h" 3
template<class T, int dim = 1>
struct __attribute__((device_builtin_surface_type)) surface : public surfaceReference
{

  __attribute__((host)) surface(void)
  {
    channelDesc = cudaCreateChannelDesc<T>();
  }

  __attribute__((host)) surface(struct cudaChannelFormatDesc desc)
  {
    channelDesc = desc;
  }

};

template<int dim>
struct __attribute__((device_builtin_surface_type)) surface<void, dim> : public surfaceReference
{

  __attribute__((host)) surface(void)
  {
    channelDesc = cudaCreateChannelDesc<void>();
  }

};
# 117 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 1 "/usr/local/cuda-11.7/include/cuda_texture_types.h" 1 3
# 74 "/usr/local/cuda-11.7/include/cuda_texture_types.h" 3
template<class T, int texType = 0x01, enum cudaTextureReadMode mode = cudaReadModeElementType>
struct __attribute__((device_builtin_texture_type)) texture : public textureReference
{

  __attribute__((host)) texture(int norm = 0,
                   enum cudaTextureFilterMode fMode = cudaFilterModePoint,
                   enum cudaTextureAddressMode aMode = cudaAddressModeClamp)
  {
    normalized = norm;
    filterMode = fMode;
    addressMode[0] = aMode;
    addressMode[1] = aMode;
    addressMode[2] = aMode;
    channelDesc = cudaCreateChannelDesc<T>();
    sRGB = 0;
  }

  __attribute__((host)) texture(int norm,
                   enum cudaTextureFilterMode fMode,
                   enum cudaTextureAddressMode aMode,
                   struct cudaChannelFormatDesc desc)
  {
    normalized = norm;
    filterMode = fMode;
    addressMode[0] = aMode;
    addressMode[1] = aMode;
    addressMode[2] = aMode;
    channelDesc = desc;
    sRGB = 0;
  }

};
# 118 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 1 "/usr/local/cuda-11.7/include/crt/device_functions.h" 1 3
# 119 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 1 "/usr/local/cuda-11.7/include/device_launch_parameters.h" 1 3
# 120 "/usr/local/cuda-11.7/include/cuda_runtime.h" 2 3
# 201 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaLaunchKernel(
  const T *func,
  dim3 gridDim,
  dim3 blockDim,
  void **args,
  size_t sharedMem = 0,
  cudaStream_t stream = 0
)
{
    return ::cudaLaunchKernel((const void *)func, gridDim, blockDim, args, sharedMem, stream);
}
# 263 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaLaunchCooperativeKernel(
  const T *func,
  dim3 gridDim,
  dim3 blockDim,
  void **args,
  size_t sharedMem = 0,
  cudaStream_t stream = 0
)
{
    return ::cudaLaunchCooperativeKernel((const void *)func, gridDim, blockDim, args, sharedMem, stream);
}
# 307 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
static __inline__ __attribute__((host)) cudaError_t cudaEventCreate(
  cudaEvent_t *event,
  unsigned int flags
)
{
  return ::cudaEventCreateWithFlags(event, flags);
}
# 372 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
static __inline__ __attribute__((host)) cudaError_t cudaMallocHost(
  void **ptr,
  size_t size,
  unsigned int flags
)
{
  return ::cudaHostAlloc(ptr, size, flags);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaHostAlloc(
  T **ptr,
  size_t size,
  unsigned int flags
)
{
  return ::cudaHostAlloc((void**)(void*)ptr, size, flags);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaHostGetDevicePointer(
  T **pDevice,
  void *pHost,
  unsigned int flags
)
{
  return ::cudaHostGetDevicePointer((void**)(void*)pDevice, pHost, flags);
}
# 501 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMallocManaged(
  T **devPtr,
  size_t size,
  unsigned int flags = 0x01
)
{
  return ::cudaMallocManaged((void**)(void*)devPtr, size, flags);
}
# 591 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaStreamAttachMemAsync(
  cudaStream_t stream,
  T *devPtr,
  size_t length = 0,
  unsigned int flags = 0x04
)
{
  return ::cudaStreamAttachMemAsync(stream, (void*)devPtr, length, flags);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMalloc(
  T **devPtr,
  size_t size
)
{
  return ::cudaMalloc((void**)(void*)devPtr, size);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMallocHost(
  T **ptr,
  size_t size,
  unsigned int flags = 0
)
{
  return cudaMallocHost((void**)(void*)ptr, size, flags);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMallocPitch(
  T **devPtr,
  size_t *pitch,
  size_t width,
  size_t height
)
{
  return ::cudaMallocPitch((void**)(void*)devPtr, pitch, width, height);
}
# 641 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
static __inline__ __attribute__((host)) cudaError_t cudaMallocAsync(
  void **ptr,
  size_t size,
  cudaMemPool_t memPool,
  cudaStream_t stream
)
{
  return ::cudaMallocFromPoolAsync(ptr, size, memPool, stream);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMallocAsync(
  T **ptr,
  size_t size,
  cudaMemPool_t memPool,
  cudaStream_t stream
)
{
  return ::cudaMallocFromPoolAsync((void**)(void*)ptr, size, memPool, stream);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMallocAsync(
  T **ptr,
  size_t size,
  cudaStream_t stream
)
{
  return ::cudaMallocAsync((void**)(void*)ptr, size, stream);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMallocFromPoolAsync(
  T **ptr,
  size_t size,
  cudaMemPool_t memPool,
  cudaStream_t stream
)
{
  return ::cudaMallocFromPoolAsync((void**)(void*)ptr, size, memPool, stream);
}
# 720 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMemcpyToSymbol(
  const T &symbol,
  const void *src,
        size_t count,
        size_t offset = 0,
        enum cudaMemcpyKind kind = cudaMemcpyHostToDevice
)
{
  return ::cudaMemcpyToSymbol((const void*)&symbol, src, count, offset, kind);
}
# 774 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMemcpyToSymbolAsync(
  const T &symbol,
  const void *src,
        size_t count,
        size_t offset = 0,
        enum cudaMemcpyKind kind = cudaMemcpyHostToDevice,
        cudaStream_t stream = 0
)
{
  return ::cudaMemcpyToSymbolAsync((const void*)&symbol, src, count, offset, kind, stream);
}
# 822 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMemcpyFromSymbol(
        void *dst,
  const T &symbol,
        size_t count,
        size_t offset = 0,
        enum cudaMemcpyKind kind = cudaMemcpyDeviceToHost
)
{
  return ::cudaMemcpyFromSymbol(dst, (const void*)&symbol, count, offset, kind);
}
# 876 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaMemcpyFromSymbolAsync(
        void *dst,
  const T &symbol,
        size_t count,
        size_t offset = 0,
        enum cudaMemcpyKind kind = cudaMemcpyDeviceToHost,
        cudaStream_t stream = 0
)
{
  return ::cudaMemcpyFromSymbolAsync(dst, (const void*)&symbol, count, offset, kind, stream);
}
# 945 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGraphAddMemcpyNodeToSymbol(
    cudaGraphNode_t *pGraphNode,
    cudaGraph_t graph,
    const cudaGraphNode_t *pDependencies,
    size_t numDependencies,
    const T &symbol,
    const void* src,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind)
{
  return ::cudaGraphAddMemcpyNodeToSymbol(pGraphNode, graph, pDependencies, numDependencies, (const void*)&symbol, src, count, offset, kind);
}
# 1016 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGraphAddMemcpyNodeFromSymbol(
    cudaGraphNode_t* pGraphNode,
    cudaGraph_t graph,
    const cudaGraphNode_t* pDependencies,
    size_t numDependencies,
    void* dst,
    const T &symbol,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind)
{
  return ::cudaGraphAddMemcpyNodeFromSymbol(pGraphNode, graph, pDependencies, numDependencies, dst, (const void*)&symbol, count, offset, kind);
}
# 1067 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGraphMemcpyNodeSetParamsToSymbol(
    cudaGraphNode_t node,
    const T &symbol,
    const void* src,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind)
{
  return ::cudaGraphMemcpyNodeSetParamsToSymbol(node, (const void*)&symbol, src, count, offset, kind);
}
# 1115 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGraphMemcpyNodeSetParamsFromSymbol(
    cudaGraphNode_t node,
    void* dst,
    const T &symbol,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind)
{
  return ::cudaGraphMemcpyNodeSetParamsFromSymbol(node, dst, (const void*)&symbol, count, offset, kind);
}
# 1173 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGraphExecMemcpyNodeSetParamsToSymbol(
    cudaGraphExec_t hGraphExec,
    cudaGraphNode_t node,
    const T &symbol,
    const void* src,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind)
{
    return ::cudaGraphExecMemcpyNodeSetParamsToSymbol(hGraphExec, node, (const void*)&symbol, src, count, offset, kind);
}
# 1232 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGraphExecMemcpyNodeSetParamsFromSymbol(
    cudaGraphExec_t hGraphExec,
    cudaGraphNode_t node,
    void* dst,
    const T &symbol,
    size_t count,
    size_t offset,
    enum cudaMemcpyKind kind)
{
  return ::cudaGraphExecMemcpyNodeSetParamsFromSymbol(hGraphExec, node, dst, (const void*)&symbol, count, offset, kind);
}
# 1271 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaUserObjectCreate(
    cudaUserObject_t *object_out,
    T *objectToWrap,
    unsigned int initialRefcount,
    unsigned int flags)
{
    return ::cudaUserObjectCreate(
            object_out,
            objectToWrap,
            [](void *vpObj) { delete reinterpret_cast<T *>(vpObj); },
            initialRefcount,
            flags);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaUserObjectCreate(
    cudaUserObject_t *object_out,
    T *objectToWrap,
    unsigned int initialRefcount,
    cudaUserObjectFlags flags)
{
    return cudaUserObjectCreate(object_out, objectToWrap, initialRefcount, (unsigned int)flags);
}
# 1321 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGetSymbolAddress(
        void **devPtr,
  const T &symbol
)
{
  return ::cudaGetSymbolAddress(devPtr, (const void*)&symbol);
}
# 1353 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaGetSymbolSize(
        size_t *size,
  const T &symbol
)
{
  return ::cudaGetSymbolSize(size, (const void*)&symbol);
}
# 1397 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTexture(
        size_t *offset,
  const struct texture<T, dim, readMode> &tex,
  const void *devPtr,
  const struct cudaChannelFormatDesc &desc,
        size_t size = (2147483647 *2U +1U)
)
{
  return ::cudaBindTexture(offset, &tex, devPtr, &desc, size);
}
# 1443 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTexture(
        size_t *offset,
  const struct texture<T, dim, readMode> &tex,
  const void *devPtr,
        size_t size = (2147483647 *2U +1U)
)
{
  return cudaBindTexture(offset, tex, devPtr, tex.channelDesc, size);
}
# 1500 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTexture2D(
        size_t *offset,
  const struct texture<T, dim, readMode> &tex,
  const void *devPtr,
  const struct cudaChannelFormatDesc &desc,
  size_t width,
  size_t height,
  size_t pitch
)
{
  return ::cudaBindTexture2D(offset, &tex, devPtr, &desc, width, height, pitch);
}
# 1559 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTexture2D(
        size_t *offset,
  const struct texture<T, dim, readMode> &tex,
  const void *devPtr,
  size_t width,
  size_t height,
  size_t pitch
)
{
  return ::cudaBindTexture2D(offset, &tex, devPtr, &tex.channelDesc, width, height, pitch);
}
# 1602 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTextureToArray(
  const struct texture<T, dim, readMode> &tex,
  cudaArray_const_t array,
  const struct cudaChannelFormatDesc &desc
)
{
  return ::cudaBindTextureToArray(&tex, array, &desc);
}
# 1641 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTextureToArray(
  const struct texture<T, dim, readMode> &tex,
  cudaArray_const_t array
)
{
  struct cudaChannelFormatDesc desc;
  cudaError_t err = ::cudaGetChannelDesc(&desc, array);

  return err == cudaSuccess ? cudaBindTextureToArray(tex, array, desc) : err;
}
# 1683 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTextureToMipmappedArray(
  const struct texture<T, dim, readMode> &tex,
  cudaMipmappedArray_const_t mipmappedArray,
  const struct cudaChannelFormatDesc &desc
)
{
  return ::cudaBindTextureToMipmappedArray(&tex, mipmappedArray, &desc);
}
# 1722 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindTextureToMipmappedArray(
  const struct texture<T, dim, readMode> &tex,
  cudaMipmappedArray_const_t mipmappedArray
)
{
  struct cudaChannelFormatDesc desc;
  cudaArray_t levelArray;
  cudaError_t err = ::cudaGetMipmappedArrayLevel(&levelArray, mipmappedArray, 0);

  if (err != cudaSuccess) {
      return err;
  }
  err = ::cudaGetChannelDesc(&desc, levelArray);

  return err == cudaSuccess ? cudaBindTextureToMipmappedArray(tex, mipmappedArray, desc) : err;
}
# 1765 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaUnbindTexture(
  const struct texture<T, dim, readMode> &tex
)
{
  return ::cudaUnbindTexture(&tex);
}
# 1801 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim, enum cudaTextureReadMode readMode>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaGetTextureAlignmentOffset(
        size_t *offset,
  const struct texture<T, dim, readMode> &tex
)
{
  return ::cudaGetTextureAlignmentOffset(offset, &tex);
}
# 1853 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaFuncSetCacheConfig(
  T *func,
  enum cudaFuncCache cacheConfig
)
{
  return ::cudaFuncSetCacheConfig((const void*)func, cacheConfig);
}

template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaFuncSetSharedMemConfig(
  T *func,
  enum cudaSharedMemConfig config
)
{
  return ::cudaFuncSetSharedMemConfig((const void*)func, config);
}
# 1901 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessor(
    int *numBlocks,
    T func,
    int blockSize,
    size_t dynamicSMemSize)
{
    return ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, (const void*)func, blockSize, dynamicSMemSize, 0x00);
}
# 1953 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
    int *numBlocks,
    T func,
    int blockSize,
    size_t dynamicSMemSize,
    unsigned int flags)
{
    return ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, (const void*)func, blockSize, dynamicSMemSize, flags);
}


class __cudaOccupancyB2DHelper {
  size_t n;
public:
  inline __attribute__((host)) __attribute__((device)) __cudaOccupancyB2DHelper(size_t n_) : n(n_) {}
  inline __attribute__((host)) __attribute__((device)) size_t operator()(int)
  {
      return n;
  }
};
# 2023 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<typename UnaryFunction, class T>
static __inline__ __attribute__((host)) __attribute__((device)) cudaError_t cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(
    int *minGridSize,
    int *blockSize,
    T func,
    UnaryFunction blockSizeToDynamicSMemSize,
    int blockSizeLimit = 0,
    unsigned int flags = 0)
{
    cudaError_t status;


    int device;
    struct cudaFuncAttributes attr;


    int maxThreadsPerMultiProcessor;
    int warpSize;
    int devMaxThreadsPerBlock;
    int multiProcessorCount;
    int funcMaxThreadsPerBlock;
    int occupancyLimit;
    int granularity;


    int maxBlockSize = 0;
    int numBlocks = 0;
    int maxOccupancy = 0;


    int blockSizeToTryAligned;
    int blockSizeToTry;
    int blockSizeLimitAligned;
    int occupancyInBlocks;
    int occupancyInThreads;
    size_t dynamicSMemSize;


    if (!minGridSize || !blockSize || !func) {
        return cudaErrorInvalidValue;
    }


    status = ::cudaGetDevice(&device);
    if (status != cudaSuccess) {
        return status;
    }

    status = cudaDeviceGetAttribute(
        &maxThreadsPerMultiProcessor,
        cudaDevAttrMaxThreadsPerMultiProcessor,
        device);
    if (status != cudaSuccess) {
        return status;
    }

    status = cudaDeviceGetAttribute(
        &warpSize,
        cudaDevAttrWarpSize,
        device);
    if (status != cudaSuccess) {
        return status;
    }

    status = cudaDeviceGetAttribute(
        &devMaxThreadsPerBlock,
        cudaDevAttrMaxThreadsPerBlock,
        device);
    if (status != cudaSuccess) {
        return status;
    }

    status = cudaDeviceGetAttribute(
        &multiProcessorCount,
        cudaDevAttrMultiProcessorCount,
        device);
    if (status != cudaSuccess) {
        return status;
    }

    status = cudaFuncGetAttributes(&attr, func);
    if (status != cudaSuccess) {
        return status;
    }

    funcMaxThreadsPerBlock = attr.maxThreadsPerBlock;


    occupancyLimit = maxThreadsPerMultiProcessor;
    granularity = warpSize;

    if (blockSizeLimit == 0) {
        blockSizeLimit = devMaxThreadsPerBlock;
    }

    if (devMaxThreadsPerBlock < blockSizeLimit) {
        blockSizeLimit = devMaxThreadsPerBlock;
    }

    if (funcMaxThreadsPerBlock < blockSizeLimit) {
        blockSizeLimit = funcMaxThreadsPerBlock;
    }

    blockSizeLimitAligned = ((blockSizeLimit + (granularity - 1)) / granularity) * granularity;

    for (blockSizeToTryAligned = blockSizeLimitAligned; blockSizeToTryAligned > 0; blockSizeToTryAligned -= granularity) {


        if (blockSizeLimit < blockSizeToTryAligned) {
            blockSizeToTry = blockSizeLimit;
        } else {
            blockSizeToTry = blockSizeToTryAligned;
        }

        dynamicSMemSize = blockSizeToDynamicSMemSize(blockSizeToTry);

        status = cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
            &occupancyInBlocks,
            func,
            blockSizeToTry,
            dynamicSMemSize,
            flags);

        if (status != cudaSuccess) {
            return status;
        }

        occupancyInThreads = blockSizeToTry * occupancyInBlocks;

        if (occupancyInThreads > maxOccupancy) {
            maxBlockSize = blockSizeToTry;
            numBlocks = occupancyInBlocks;
            maxOccupancy = occupancyInThreads;
        }


        if (occupancyLimit == maxOccupancy) {
            break;
        }
    }


    *minGridSize = numBlocks * multiProcessorCount;
    *blockSize = maxBlockSize;

    return status;
}
# 2219 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<typename UnaryFunction, class T>
static __inline__ __attribute__((host)) __attribute__((device)) cudaError_t cudaOccupancyMaxPotentialBlockSizeVariableSMem(
    int *minGridSize,
    int *blockSize,
    T func,
    UnaryFunction blockSizeToDynamicSMemSize,
    int blockSizeLimit = 0)
{
    return cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, blockSizeLimit, 0x00);
}
# 2265 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) __attribute__((device)) cudaError_t cudaOccupancyMaxPotentialBlockSize(
    int *minGridSize,
    int *blockSize,
    T func,
    size_t dynamicSMemSize = 0,
    int blockSizeLimit = 0)
{
  return cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(minGridSize, blockSize, func, __cudaOccupancyB2DHelper(dynamicSMemSize), blockSizeLimit, 0x00);
}
# 2303 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaOccupancyAvailableDynamicSMemPerBlock(
    size_t *dynamicSmemSize,
    T func,
    int numBlocks,
    int blockSize)
{
    return ::cudaOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, (const void*)func, numBlocks, blockSize);
}
# 2362 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) __attribute__((device)) cudaError_t cudaOccupancyMaxPotentialBlockSizeWithFlags(
    int *minGridSize,
    int *blockSize,
    T func,
    size_t dynamicSMemSize = 0,
    int blockSizeLimit = 0,
    unsigned int flags = 0)
{
    return cudaOccupancyMaxPotentialBlockSizeVariableSMemWithFlags(minGridSize, blockSize, func, __cudaOccupancyB2DHelper(dynamicSMemSize), blockSizeLimit, flags);
}
# 2405 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaFuncGetAttributes(
  struct cudaFuncAttributes *attr,
  T *entry
)
{
  return ::cudaFuncGetAttributes(attr, (const void*)entry);
}
# 2469 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T>
static __inline__ __attribute__((host)) cudaError_t cudaFuncSetAttribute(
  T *entry,
  enum cudaFuncAttribute attr,
  int value
)
{
  return ::cudaFuncSetAttribute((const void*)entry, attr, value);
}
# 2501 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindSurfaceToArray(
  const struct surface<T, dim> &surf,
  cudaArray_const_t array,
  const struct cudaChannelFormatDesc &desc
)
{
  return ::cudaBindSurfaceToArray(&surf, array, &desc);
}
# 2532 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
template<class T, int dim>
static __attribute__((deprecated)) __inline__ __attribute__((host)) cudaError_t cudaBindSurfaceToArray(
  const struct surface<T, dim> &surf,
  cudaArray_const_t array
)
{
  struct cudaChannelFormatDesc desc;
  cudaError_t err = ::cudaGetChannelDesc(&desc, array);

  return err == cudaSuccess ? cudaBindSurfaceToArray(surf, array, desc) : err;
}
# 2553 "/usr/local/cuda-11.7/include/cuda_runtime.h" 3
#pragma GCC diagnostic pop
# 112 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 125 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_runtime.h" 1 3
# 58 "/usr/local/cuda-11.7/include/crt/host_runtime.h" 3
# 1 "/usr/local/cuda-11.7/include/builtin_types.h" 1 3
# 59 "/usr/local/cuda-11.7/include/crt/host_runtime.h" 2 3
# 1 "/usr/local/cuda-11.7/include/crt/storage_class.h" 1 3
# 60 "/usr/local/cuda-11.7/include/crt/host_runtime.h" 2 3
# 126 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 151 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_libdevice_declares.h" 1 3
# 14 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_libdevice_declares.h" 3
extern "C" {
# 24 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_libdevice_declares.h" 3
__attribute__((device)) int __nv_abs(int __a);
__attribute__((device)) double __nv_acos(double __a);
__attribute__((device)) float __nv_acosf(float __a);
__attribute__((device)) double __nv_acosh(double __a);
__attribute__((device)) float __nv_acoshf(float __a);
__attribute__((device)) double __nv_asin(double __a);
__attribute__((device)) float __nv_asinf(float __a);
__attribute__((device)) double __nv_asinh(double __a);
__attribute__((device)) float __nv_asinhf(float __a);
__attribute__((device)) double __nv_atan2(double __a, double __b);
__attribute__((device)) float __nv_atan2f(float __a, float __b);
__attribute__((device)) double __nv_atan(double __a);
__attribute__((device)) float __nv_atanf(float __a);
__attribute__((device)) double __nv_atanh(double __a);
__attribute__((device)) float __nv_atanhf(float __a);
__attribute__((device)) int __nv_brev(int __a);
__attribute__((device)) long long __nv_brevll(long long __a);
__attribute__((device)) int __nv_byte_perm(int __a, int __b, int __c);
__attribute__((device)) double __nv_cbrt(double __a);
__attribute__((device)) float __nv_cbrtf(float __a);
__attribute__((device)) double __nv_ceil(double __a);
__attribute__((device)) float __nv_ceilf(float __a);
__attribute__((device)) int __nv_clz(int __a);
__attribute__((device)) int __nv_clzll(long long __a);
__attribute__((device)) double __nv_copysign(double __a, double __b);
__attribute__((device)) float __nv_copysignf(float __a, float __b);
__attribute__((device)) double __nv_cos(double __a);
__attribute__((device)) float __nv_cosf(float __a);
__attribute__((device)) double __nv_cosh(double __a);
__attribute__((device)) float __nv_coshf(float __a);
__attribute__((device)) double __nv_cospi(double __a);
__attribute__((device)) float __nv_cospif(float __a);
__attribute__((device)) double __nv_cyl_bessel_i0(double __a);
__attribute__((device)) float __nv_cyl_bessel_i0f(float __a);
__attribute__((device)) double __nv_cyl_bessel_i1(double __a);
__attribute__((device)) float __nv_cyl_bessel_i1f(float __a);
__attribute__((device)) double __nv_dadd_rd(double __a, double __b);
__attribute__((device)) double __nv_dadd_rn(double __a, double __b);
__attribute__((device)) double __nv_dadd_ru(double __a, double __b);
__attribute__((device)) double __nv_dadd_rz(double __a, double __b);
__attribute__((device)) double __nv_ddiv_rd(double __a, double __b);
__attribute__((device)) double __nv_ddiv_rn(double __a, double __b);
__attribute__((device)) double __nv_ddiv_ru(double __a, double __b);
__attribute__((device)) double __nv_ddiv_rz(double __a, double __b);
__attribute__((device)) double __nv_dmul_rd(double __a, double __b);
__attribute__((device)) double __nv_dmul_rn(double __a, double __b);
__attribute__((device)) double __nv_dmul_ru(double __a, double __b);
__attribute__((device)) double __nv_dmul_rz(double __a, double __b);
__attribute__((device)) float __nv_double2float_rd(double __a);
__attribute__((device)) float __nv_double2float_rn(double __a);
__attribute__((device)) float __nv_double2float_ru(double __a);
__attribute__((device)) float __nv_double2float_rz(double __a);
__attribute__((device)) int __nv_double2hiint(double __a);
__attribute__((device)) int __nv_double2int_rd(double __a);
__attribute__((device)) int __nv_double2int_rn(double __a);
__attribute__((device)) int __nv_double2int_ru(double __a);
__attribute__((device)) int __nv_double2int_rz(double __a);
__attribute__((device)) long long __nv_double2ll_rd(double __a);
__attribute__((device)) long long __nv_double2ll_rn(double __a);
__attribute__((device)) long long __nv_double2ll_ru(double __a);
__attribute__((device)) long long __nv_double2ll_rz(double __a);
__attribute__((device)) int __nv_double2loint(double __a);
__attribute__((device)) unsigned int __nv_double2uint_rd(double __a);
__attribute__((device)) unsigned int __nv_double2uint_rn(double __a);
__attribute__((device)) unsigned int __nv_double2uint_ru(double __a);
__attribute__((device)) unsigned int __nv_double2uint_rz(double __a);
__attribute__((device)) unsigned long long __nv_double2ull_rd(double __a);
__attribute__((device)) unsigned long long __nv_double2ull_rn(double __a);
__attribute__((device)) unsigned long long __nv_double2ull_ru(double __a);
__attribute__((device)) unsigned long long __nv_double2ull_rz(double __a);
__attribute__((device)) unsigned long long __nv_double_as_longlong(double __a);
__attribute__((device)) double __nv_drcp_rd(double __a);
__attribute__((device)) double __nv_drcp_rn(double __a);
__attribute__((device)) double __nv_drcp_ru(double __a);
__attribute__((device)) double __nv_drcp_rz(double __a);
__attribute__((device)) double __nv_dsqrt_rd(double __a);
__attribute__((device)) double __nv_dsqrt_rn(double __a);
__attribute__((device)) double __nv_dsqrt_ru(double __a);
__attribute__((device)) double __nv_dsqrt_rz(double __a);
__attribute__((device)) double __nv_dsub_rd(double __a, double __b);
__attribute__((device)) double __nv_dsub_rn(double __a, double __b);
__attribute__((device)) double __nv_dsub_ru(double __a, double __b);
__attribute__((device)) double __nv_dsub_rz(double __a, double __b);
__attribute__((device)) double __nv_erfc(double __a);
__attribute__((device)) float __nv_erfcf(float __a);
__attribute__((device)) double __nv_erfcinv(double __a);
__attribute__((device)) float __nv_erfcinvf(float __a);
__attribute__((device)) double __nv_erfcx(double __a);
__attribute__((device)) float __nv_erfcxf(float __a);
__attribute__((device)) double __nv_erf(double __a);
__attribute__((device)) float __nv_erff(float __a);
__attribute__((device)) double __nv_erfinv(double __a);
__attribute__((device)) float __nv_erfinvf(float __a);
__attribute__((device)) double __nv_exp10(double __a);
__attribute__((device)) float __nv_exp10f(float __a);
__attribute__((device)) double __nv_exp2(double __a);
__attribute__((device)) float __nv_exp2f(float __a);
__attribute__((device)) double __nv_exp(double __a);
__attribute__((device)) float __nv_expf(float __a);
__attribute__((device)) double __nv_expm1(double __a);
__attribute__((device)) float __nv_expm1f(float __a);
__attribute__((device)) double __nv_fabs(double __a);
__attribute__((device)) float __nv_fabsf(float __a);
__attribute__((device)) float __nv_fadd_rd(float __a, float __b);
__attribute__((device)) float __nv_fadd_rn(float __a, float __b);
__attribute__((device)) float __nv_fadd_ru(float __a, float __b);
__attribute__((device)) float __nv_fadd_rz(float __a, float __b);
__attribute__((device)) float __nv_fast_cosf(float __a);
__attribute__((device)) float __nv_fast_exp10f(float __a);
__attribute__((device)) float __nv_fast_expf(float __a);
__attribute__((device)) float __nv_fast_fdividef(float __a, float __b);
__attribute__((device)) float __nv_fast_log10f(float __a);
__attribute__((device)) float __nv_fast_log2f(float __a);
__attribute__((device)) float __nv_fast_logf(float __a);
__attribute__((device)) float __nv_fast_powf(float __a, float __b);
__attribute__((device)) void __nv_fast_sincosf(float __a, float *__s, float *__c);
__attribute__((device)) float __nv_fast_sinf(float __a);
__attribute__((device)) float __nv_fast_tanf(float __a);
__attribute__((device)) double __nv_fdim(double __a, double __b);
__attribute__((device)) float __nv_fdimf(float __a, float __b);
__attribute__((device)) float __nv_fdiv_rd(float __a, float __b);
__attribute__((device)) float __nv_fdiv_rn(float __a, float __b);
__attribute__((device)) float __nv_fdiv_ru(float __a, float __b);
__attribute__((device)) float __nv_fdiv_rz(float __a, float __b);
__attribute__((device)) int __nv_ffs(int __a);
__attribute__((device)) int __nv_ffsll(long long __a);
__attribute__((device)) int __nv_finitef(float __a);
__attribute__((device)) unsigned short __nv_float2half_rn(float __a);
__attribute__((device)) int __nv_float2int_rd(float __a);
__attribute__((device)) int __nv_float2int_rn(float __a);
__attribute__((device)) int __nv_float2int_ru(float __a);
__attribute__((device)) int __nv_float2int_rz(float __a);
__attribute__((device)) long long __nv_float2ll_rd(float __a);
__attribute__((device)) long long __nv_float2ll_rn(float __a);
__attribute__((device)) long long __nv_float2ll_ru(float __a);
__attribute__((device)) long long __nv_float2ll_rz(float __a);
__attribute__((device)) unsigned int __nv_float2uint_rd(float __a);
__attribute__((device)) unsigned int __nv_float2uint_rn(float __a);
__attribute__((device)) unsigned int __nv_float2uint_ru(float __a);
__attribute__((device)) unsigned int __nv_float2uint_rz(float __a);
__attribute__((device)) unsigned long long __nv_float2ull_rd(float __a);
__attribute__((device)) unsigned long long __nv_float2ull_rn(float __a);
__attribute__((device)) unsigned long long __nv_float2ull_ru(float __a);
__attribute__((device)) unsigned long long __nv_float2ull_rz(float __a);
__attribute__((device)) int __nv_float_as_int(float __a);
__attribute__((device)) unsigned int __nv_float_as_uint(float __a);
__attribute__((device)) double __nv_floor(double __a);
__attribute__((device)) float __nv_floorf(float __a);
__attribute__((device)) double __nv_fma(double __a, double __b, double __c);
__attribute__((device)) float __nv_fmaf(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_ieee_rd(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_ieee_rn(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_ieee_ru(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_ieee_rz(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_rd(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_rn(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_ru(float __a, float __b, float __c);
__attribute__((device)) float __nv_fmaf_rz(float __a, float __b, float __c);
__attribute__((device)) double __nv_fma_rd(double __a, double __b, double __c);
__attribute__((device)) double __nv_fma_rn(double __a, double __b, double __c);
__attribute__((device)) double __nv_fma_ru(double __a, double __b, double __c);
__attribute__((device)) double __nv_fma_rz(double __a, double __b, double __c);
__attribute__((device)) double __nv_fmax(double __a, double __b);
__attribute__((device)) float __nv_fmaxf(float __a, float __b);
__attribute__((device)) double __nv_fmin(double __a, double __b);
__attribute__((device)) float __nv_fminf(float __a, float __b);
__attribute__((device)) double __nv_fmod(double __a, double __b);
__attribute__((device)) float __nv_fmodf(float __a, float __b);
__attribute__((device)) float __nv_fmul_rd(float __a, float __b);
__attribute__((device)) float __nv_fmul_rn(float __a, float __b);
__attribute__((device)) float __nv_fmul_ru(float __a, float __b);
__attribute__((device)) float __nv_fmul_rz(float __a, float __b);
__attribute__((device)) float __nv_frcp_rd(float __a);
__attribute__((device)) float __nv_frcp_rn(float __a);
__attribute__((device)) float __nv_frcp_ru(float __a);
__attribute__((device)) float __nv_frcp_rz(float __a);
__attribute__((device)) double __nv_frexp(double __a, int *__b);
__attribute__((device)) float __nv_frexpf(float __a, int *__b);
__attribute__((device)) float __nv_frsqrt_rn(float __a);
__attribute__((device)) float __nv_fsqrt_rd(float __a);
__attribute__((device)) float __nv_fsqrt_rn(float __a);
__attribute__((device)) float __nv_fsqrt_ru(float __a);
__attribute__((device)) float __nv_fsqrt_rz(float __a);
__attribute__((device)) float __nv_fsub_rd(float __a, float __b);
__attribute__((device)) float __nv_fsub_rn(float __a, float __b);
__attribute__((device)) float __nv_fsub_ru(float __a, float __b);
__attribute__((device)) float __nv_fsub_rz(float __a, float __b);
__attribute__((device)) int __nv_hadd(int __a, int __b);
__attribute__((device)) float __nv_half2float(unsigned short __h);
__attribute__((device)) double __nv_hiloint2double(int __a, int __b);
__attribute__((device)) double __nv_hypot(double __a, double __b);
__attribute__((device)) float __nv_hypotf(float __a, float __b);
__attribute__((device)) int __nv_ilogb(double __a);
__attribute__((device)) int __nv_ilogbf(float __a);
__attribute__((device)) double __nv_int2double_rn(int __a);
__attribute__((device)) float __nv_int2float_rd(int __a);
__attribute__((device)) float __nv_int2float_rn(int __a);
__attribute__((device)) float __nv_int2float_ru(int __a);
__attribute__((device)) float __nv_int2float_rz(int __a);
__attribute__((device)) float __nv_int_as_float(int __a);
__attribute__((device)) int __nv_isfinited(double __a);
__attribute__((device)) int __nv_isinfd(double __a);
__attribute__((device)) int __nv_isinff(float __a);
__attribute__((device)) int __nv_isnand(double __a);
__attribute__((device)) int __nv_isnanf(float __a);
__attribute__((device)) double __nv_j0(double __a);
__attribute__((device)) float __nv_j0f(float __a);
__attribute__((device)) double __nv_j1(double __a);
__attribute__((device)) float __nv_j1f(float __a);
__attribute__((device)) float __nv_jnf(int __a, float __b);
__attribute__((device)) double __nv_jn(int __a, double __b);
__attribute__((device)) double __nv_ldexp(double __a, int __b);
__attribute__((device)) float __nv_ldexpf(float __a, int __b);
__attribute__((device)) double __nv_lgamma(double __a);
__attribute__((device)) float __nv_lgammaf(float __a);
__attribute__((device)) double __nv_ll2double_rd(long long __a);
__attribute__((device)) double __nv_ll2double_rn(long long __a);
__attribute__((device)) double __nv_ll2double_ru(long long __a);
__attribute__((device)) double __nv_ll2double_rz(long long __a);
__attribute__((device)) float __nv_ll2float_rd(long long __a);
__attribute__((device)) float __nv_ll2float_rn(long long __a);
__attribute__((device)) float __nv_ll2float_ru(long long __a);
__attribute__((device)) float __nv_ll2float_rz(long long __a);
__attribute__((device)) long long __nv_llabs(long long __a);
__attribute__((device)) long long __nv_llmax(long long __a, long long __b);
__attribute__((device)) long long __nv_llmin(long long __a, long long __b);
__attribute__((device)) long long __nv_llrint(double __a);
__attribute__((device)) long long __nv_llrintf(float __a);
__attribute__((device)) long long __nv_llround(double __a);
__attribute__((device)) long long __nv_llroundf(float __a);
__attribute__((device)) double __nv_log10(double __a);
__attribute__((device)) float __nv_log10f(float __a);
__attribute__((device)) double __nv_log1p(double __a);
__attribute__((device)) float __nv_log1pf(float __a);
__attribute__((device)) double __nv_log2(double __a);
__attribute__((device)) float __nv_log2f(float __a);
__attribute__((device)) double __nv_logb(double __a);
__attribute__((device)) float __nv_logbf(float __a);
__attribute__((device)) double __nv_log(double __a);
__attribute__((device)) float __nv_logf(float __a);
__attribute__((device)) double __nv_longlong_as_double(long long __a);
__attribute__((device)) int __nv_max(int __a, int __b);
__attribute__((device)) int __nv_min(int __a, int __b);
__attribute__((device)) double __nv_modf(double __a, double *__b);
__attribute__((device)) float __nv_modff(float __a, float *__b);
__attribute__((device)) int __nv_mul24(int __a, int __b);
__attribute__((device)) long long __nv_mul64hi(long long __a, long long __b);
__attribute__((device)) int __nv_mulhi(int __a, int __b);
__attribute__((device)) double __nv_nan(const signed char *__a);
__attribute__((device)) float __nv_nanf(const signed char *__a);
__attribute__((device)) double __nv_nearbyint(double __a);
__attribute__((device)) float __nv_nearbyintf(float __a);
__attribute__((device)) double __nv_nextafter(double __a, double __b);
__attribute__((device)) float __nv_nextafterf(float __a, float __b);
__attribute__((device)) double __nv_norm3d(double __a, double __b, double __c);
__attribute__((device)) float __nv_norm3df(float __a, float __b, float __c);
__attribute__((device)) double __nv_norm4d(double __a, double __b, double __c, double __d);
__attribute__((device)) float __nv_norm4df(float __a, float __b, float __c, float __d);
__attribute__((device)) double __nv_normcdf(double __a);
__attribute__((device)) float __nv_normcdff(float __a);
__attribute__((device)) double __nv_normcdfinv(double __a);
__attribute__((device)) float __nv_normcdfinvf(float __a);
__attribute__((device)) float __nv_normf(int __a, const float *__b);
__attribute__((device)) double __nv_norm(int __a, const double *__b);
__attribute__((device)) int __nv_popc(int __a);
__attribute__((device)) int __nv_popcll(long long __a);
__attribute__((device)) double __nv_pow(double __a, double __b);
__attribute__((device)) float __nv_powf(float __a, float __b);
__attribute__((device)) double __nv_powi(double __a, int __b);
__attribute__((device)) float __nv_powif(float __a, int __b);
__attribute__((device)) double __nv_rcbrt(double __a);
__attribute__((device)) float __nv_rcbrtf(float __a);
__attribute__((device)) double __nv_rcp64h(double __a);
__attribute__((device)) double __nv_remainder(double __a, double __b);
__attribute__((device)) float __nv_remainderf(float __a, float __b);
__attribute__((device)) double __nv_remquo(double __a, double __b, int *__c);
__attribute__((device)) float __nv_remquof(float __a, float __b, int *__c);
__attribute__((device)) int __nv_rhadd(int __a, int __b);
__attribute__((device)) double __nv_rhypot(double __a, double __b);
__attribute__((device)) float __nv_rhypotf(float __a, float __b);
__attribute__((device)) double __nv_rint(double __a);
__attribute__((device)) float __nv_rintf(float __a);
__attribute__((device)) double __nv_rnorm3d(double __a, double __b, double __c);
__attribute__((device)) float __nv_rnorm3df(float __a, float __b, float __c);
__attribute__((device)) double __nv_rnorm4d(double __a, double __b, double __c, double __d);
__attribute__((device)) float __nv_rnorm4df(float __a, float __b, float __c, float __d);
__attribute__((device)) float __nv_rnormf(int __a, const float *__b);
__attribute__((device)) double __nv_rnorm(int __a, const double *__b);
__attribute__((device)) double __nv_round(double __a);
__attribute__((device)) float __nv_roundf(float __a);
__attribute__((device)) double __nv_rsqrt(double __a);
__attribute__((device)) float __nv_rsqrtf(float __a);
__attribute__((device)) int __nv_sad(int __a, int __b, int __c);
__attribute__((device)) float __nv_saturatef(float __a);
__attribute__((device)) double __nv_scalbn(double __a, int __b);
__attribute__((device)) float __nv_scalbnf(float __a, int __b);
__attribute__((device)) int __nv_signbitd(double __a);
__attribute__((device)) int __nv_signbitf(float __a);
__attribute__((device)) void __nv_sincos(double __a, double *__b, double *__c);
__attribute__((device)) void __nv_sincosf(float __a, float *__b, float *__c);
__attribute__((device)) void __nv_sincospi(double __a, double *__b, double *__c);
__attribute__((device)) void __nv_sincospif(float __a, float *__b, float *__c);
__attribute__((device)) double __nv_sin(double __a);
__attribute__((device)) float __nv_sinf(float __a);
__attribute__((device)) double __nv_sinh(double __a);
__attribute__((device)) float __nv_sinhf(float __a);
__attribute__((device)) double __nv_sinpi(double __a);
__attribute__((device)) float __nv_sinpif(float __a);
__attribute__((device)) double __nv_sqrt(double __a);
__attribute__((device)) float __nv_sqrtf(float __a);
__attribute__((device)) double __nv_tan(double __a);
__attribute__((device)) float __nv_tanf(float __a);
__attribute__((device)) double __nv_tanh(double __a);
__attribute__((device)) float __nv_tanhf(float __a);
__attribute__((device)) double __nv_tgamma(double __a);
__attribute__((device)) float __nv_tgammaf(float __a);
__attribute__((device)) double __nv_trunc(double __a);
__attribute__((device)) float __nv_truncf(float __a);
__attribute__((device)) int __nv_uhadd(unsigned int __a, unsigned int __b);
__attribute__((device)) double __nv_uint2double_rn(unsigned int __i);
__attribute__((device)) float __nv_uint2float_rd(unsigned int __a);
__attribute__((device)) float __nv_uint2float_rn(unsigned int __a);
__attribute__((device)) float __nv_uint2float_ru(unsigned int __a);
__attribute__((device)) float __nv_uint2float_rz(unsigned int __a);
__attribute__((device)) float __nv_uint_as_float(unsigned int __a);
__attribute__((device)) double __nv_ull2double_rd(unsigned long long __a);
__attribute__((device)) double __nv_ull2double_rn(unsigned long long __a);
__attribute__((device)) double __nv_ull2double_ru(unsigned long long __a);
__attribute__((device)) double __nv_ull2double_rz(unsigned long long __a);
__attribute__((device)) float __nv_ull2float_rd(unsigned long long __a);
__attribute__((device)) float __nv_ull2float_rn(unsigned long long __a);
__attribute__((device)) float __nv_ull2float_ru(unsigned long long __a);
__attribute__((device)) float __nv_ull2float_rz(unsigned long long __a);
__attribute__((device)) unsigned long long __nv_ullmax(unsigned long long __a,
                                          unsigned long long __b);
__attribute__((device)) unsigned long long __nv_ullmin(unsigned long long __a,
                                          unsigned long long __b);
__attribute__((device)) unsigned int __nv_umax(unsigned int __a, unsigned int __b);
__attribute__((device)) unsigned int __nv_umin(unsigned int __a, unsigned int __b);
__attribute__((device)) unsigned int __nv_umul24(unsigned int __a, unsigned int __b);
__attribute__((device)) unsigned long long __nv_umul64hi(unsigned long long __a,
                                            unsigned long long __b);
__attribute__((device)) unsigned int __nv_umulhi(unsigned int __a, unsigned int __b);
__attribute__((device)) unsigned int __nv_urhadd(unsigned int __a, unsigned int __b);
__attribute__((device)) unsigned int __nv_usad(unsigned int __a, unsigned int __b,
                                  unsigned int __c);
# 454 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_libdevice_declares.h" 3
__attribute__((device)) double __nv_y0(double __a);
__attribute__((device)) float __nv_y0f(float __a);
__attribute__((device)) double __nv_y1(double __a);
__attribute__((device)) float __nv_y1f(float __a);
__attribute__((device)) float __nv_ynf(int __a, float __b);
__attribute__((device)) double __nv_yn(int __a, double __b);


}
# 152 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3


# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_device_functions.h" 1 3
# 29 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_device_functions.h" 3
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __all(int __a) { return __nvvm_vote_all(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __any(int __a) { return __nvvm_vote_any(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __brev(unsigned int __a) { return __nv_brev(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __brevll(unsigned long long __a) {
  return __nv_brevll(__a);
}

static __attribute__((device)) __inline__ __attribute__((always_inline)) void __brkpt() { __asm__ __volatile__("brkpt;"); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) void __brkpt(int __a) { __brkpt(); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __byte_perm(unsigned int __a, unsigned int __b,
                                    unsigned int __c) {
  return __nv_byte_perm(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __clz(int __a) { return __nv_clz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __clzll(long long __a) { return __nv_clzll(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __cosf(float __a) { return __nv_fast_cosf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dAtomicAdd(double *__p, double __v) {
  return __nvvm_atom_add_gen_d(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dAtomicAdd_block(double *__p, double __v) {
  return __nvvm_atom_cta_add_gen_d(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dAtomicAdd_system(double *__p, double __v) {
  return __nvvm_atom_sys_add_gen_d(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dadd_rd(double __a, double __b) {
  return __nv_dadd_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dadd_rn(double __a, double __b) {
  return __nv_dadd_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dadd_ru(double __a, double __b) {
  return __nv_dadd_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dadd_rz(double __a, double __b) {
  return __nv_dadd_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ddiv_rd(double __a, double __b) {
  return __nv_ddiv_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ddiv_rn(double __a, double __b) {
  return __nv_ddiv_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ddiv_ru(double __a, double __b) {
  return __nv_ddiv_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ddiv_rz(double __a, double __b) {
  return __nv_ddiv_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dmul_rd(double __a, double __b) {
  return __nv_dmul_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dmul_rn(double __a, double __b) {
  return __nv_dmul_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dmul_ru(double __a, double __b) {
  return __nv_dmul_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dmul_rz(double __a, double __b) {
  return __nv_dmul_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __double2float_rd(double __a) {
  return __nv_double2float_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __double2float_rn(double __a) {
  return __nv_double2float_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __double2float_ru(double __a) {
  return __nv_double2float_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __double2float_rz(double __a) {
  return __nv_double2float_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __double2hiint(double __a) { return __nv_double2hiint(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __double2int_rd(double __a) { return __nv_double2int_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __double2int_rn(double __a) { return __nv_double2int_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __double2int_ru(double __a) { return __nv_double2int_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __double2int_rz(double __a) { return __nv_double2int_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __double2ll_rd(double __a) {
  return __nv_double2ll_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __double2ll_rn(double __a) {
  return __nv_double2ll_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __double2ll_ru(double __a) {
  return __nv_double2ll_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __double2ll_rz(double __a) {
  return __nv_double2ll_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __double2loint(double __a) { return __nv_double2loint(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __double2uint_rd(double __a) {
  return __nv_double2uint_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __double2uint_rn(double __a) {
  return __nv_double2uint_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __double2uint_ru(double __a) {
  return __nv_double2uint_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __double2uint_rz(double __a) {
  return __nv_double2uint_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __double2ull_rd(double __a) {
  return __nv_double2ull_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __double2ull_rn(double __a) {
  return __nv_double2ull_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __double2ull_ru(double __a) {
  return __nv_double2ull_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __double2ull_rz(double __a) {
  return __nv_double2ull_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __double_as_longlong(double __a) {
  return __nv_double_as_longlong(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __drcp_rd(double __a) { return __nv_drcp_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __drcp_rn(double __a) { return __nv_drcp_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __drcp_ru(double __a) { return __nv_drcp_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __drcp_rz(double __a) { return __nv_drcp_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsqrt_rd(double __a) { return __nv_dsqrt_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsqrt_rn(double __a) { return __nv_dsqrt_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsqrt_ru(double __a) { return __nv_dsqrt_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsqrt_rz(double __a) { return __nv_dsqrt_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsub_rd(double __a, double __b) {
  return __nv_dsub_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsub_rn(double __a, double __b) {
  return __nv_dsub_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsub_ru(double __a, double __b) {
  return __nv_dsub_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __dsub_rz(double __a, double __b) {
  return __nv_dsub_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __exp10f(float __a) { return __nv_fast_exp10f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __expf(float __a) { return __nv_fast_expf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fAtomicAdd(float *__p, float __v) {
  return __nvvm_atom_add_gen_f(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fAtomicAdd_block(float *__p, float __v) {
  return __nvvm_atom_cta_add_gen_f(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fAtomicAdd_system(float *__p, float __v) {
  return __nvvm_atom_sys_add_gen_f(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fAtomicExch(float *__p, float __v) {
  return __nv_int_as_float(
      __nvvm_atom_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fAtomicExch_block(float *__p, float __v) {
  return __nv_int_as_float(
      __nvvm_atom_cta_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fAtomicExch_system(float *__p, float __v) {
  return __nv_int_as_float(
      __nvvm_atom_sys_xchg_gen_i((int *)__p, __nv_float_as_int(__v)));
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fadd_rd(float __a, float __b) {
  return __nv_fadd_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fadd_rn(float __a, float __b) {
  return __nv_fadd_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fadd_ru(float __a, float __b) {
  return __nv_fadd_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fadd_rz(float __a, float __b) {
  return __nv_fadd_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fdiv_rd(float __a, float __b) {
  return __nv_fdiv_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fdiv_rn(float __a, float __b) {
  return __nv_fdiv_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fdiv_ru(float __a, float __b) {
  return __nv_fdiv_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fdiv_rz(float __a, float __b) {
  return __nv_fdiv_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fdividef(float __a, float __b) {
  return __nv_fast_fdividef(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __ffs(int __a) { return __nv_ffs(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __ffsll(long long __a) { return __nv_ffsll(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __finite(double __a) { return __nv_isfinited(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __finitef(float __a) { return __nv_finitef(__a); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) int __float2int_rd(float __a) { return __nv_float2int_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __float2int_rn(float __a) { return __nv_float2int_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __float2int_ru(float __a) { return __nv_float2int_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __float2int_rz(float __a) { return __nv_float2int_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __float2ll_rd(float __a) { return __nv_float2ll_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __float2ll_rn(float __a) { return __nv_float2ll_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __float2ll_ru(float __a) { return __nv_float2ll_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __float2ll_rz(float __a) { return __nv_float2ll_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __float2uint_rd(float __a) {
  return __nv_float2uint_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __float2uint_rn(float __a) {
  return __nv_float2uint_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __float2uint_ru(float __a) {
  return __nv_float2uint_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __float2uint_rz(float __a) {
  return __nv_float2uint_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __float2ull_rd(float __a) {
  return __nv_float2ull_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __float2ull_rn(float __a) {
  return __nv_float2ull_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __float2ull_ru(float __a) {
  return __nv_float2ull_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __float2ull_rz(float __a) {
  return __nv_float2ull_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __float_as_int(float __a) { return __nv_float_as_int(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __float_as_uint(float __a) {
  return __nv_float_as_uint(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __fma_rd(double __a, double __b, double __c) {
  return __nv_fma_rd(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __fma_rn(double __a, double __b, double __c) {
  return __nv_fma_rn(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __fma_ru(double __a, double __b, double __c) {
  return __nv_fma_ru(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __fma_rz(double __a, double __b, double __c) {
  return __nv_fma_rz(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_ieee_rd(float __a, float __b, float __c) {
  return __nv_fmaf_ieee_rd(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_ieee_rn(float __a, float __b, float __c) {
  return __nv_fmaf_ieee_rn(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_ieee_ru(float __a, float __b, float __c) {
  return __nv_fmaf_ieee_ru(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_ieee_rz(float __a, float __b, float __c) {
  return __nv_fmaf_ieee_rz(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_rd(float __a, float __b, float __c) {
  return __nv_fmaf_rd(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_rn(float __a, float __b, float __c) {
  return __nv_fmaf_rn(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_ru(float __a, float __b, float __c) {
  return __nv_fmaf_ru(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmaf_rz(float __a, float __b, float __c) {
  return __nv_fmaf_rz(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmul_rd(float __a, float __b) {
  return __nv_fmul_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmul_rn(float __a, float __b) {
  return __nv_fmul_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmul_ru(float __a, float __b) {
  return __nv_fmul_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fmul_rz(float __a, float __b) {
  return __nv_fmul_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __frcp_rd(float __a) { return __nv_frcp_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __frcp_rn(float __a) { return __nv_frcp_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __frcp_ru(float __a) { return __nv_frcp_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __frcp_rz(float __a) { return __nv_frcp_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __frsqrt_rn(float __a) { return __nv_frsqrt_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsqrt_rd(float __a) { return __nv_fsqrt_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsqrt_rn(float __a) { return __nv_fsqrt_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsqrt_ru(float __a) { return __nv_fsqrt_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsqrt_rz(float __a) { return __nv_fsqrt_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsub_rd(float __a, float __b) {
  return __nv_fsub_rd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsub_rn(float __a, float __b) {
  return __nv_fsub_rn(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsub_ru(float __a, float __b) {
  return __nv_fsub_ru(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __fsub_rz(float __a, float __b) {
  return __nv_fsub_rz(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __hadd(int __a, int __b) { return __nv_hadd(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __hiloint2double(int __a, int __b) {
  return __nv_hiloint2double(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicAdd(int *__p, int __v) {
  return __nvvm_atom_add_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicAdd_block(int *__p, int __v) {
  return __nvvm_atom_cta_add_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicAdd_system(int *__p, int __v) {
  return __nvvm_atom_sys_add_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicAnd(int *__p, int __v) {
  return __nvvm_atom_and_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicAnd_block(int *__p, int __v) {
  return __nvvm_atom_cta_and_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicAnd_system(int *__p, int __v) {
  return __nvvm_atom_sys_and_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicCAS(int *__p, int __cmp, int __v) {
  return __nvvm_atom_cas_gen_i(__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicCAS_block(int *__p, int __cmp, int __v) {
  return __nvvm_atom_cta_cas_gen_i(__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicCAS_system(int *__p, int __cmp, int __v) {
  return __nvvm_atom_sys_cas_gen_i(__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicExch(int *__p, int __v) {
  return __nvvm_atom_xchg_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicExch_block(int *__p, int __v) {
  return __nvvm_atom_cta_xchg_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicExch_system(int *__p, int __v) {
  return __nvvm_atom_sys_xchg_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicMax(int *__p, int __v) {
  return __nvvm_atom_max_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicMax_block(int *__p, int __v) {
  return __nvvm_atom_cta_max_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicMax_system(int *__p, int __v) {
  return __nvvm_atom_sys_max_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicMin(int *__p, int __v) {
  return __nvvm_atom_min_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicMin_block(int *__p, int __v) {
  return __nvvm_atom_cta_min_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicMin_system(int *__p, int __v) {
  return __nvvm_atom_sys_min_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicOr(int *__p, int __v) {
  return __nvvm_atom_or_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicOr_block(int *__p, int __v) {
  return __nvvm_atom_cta_or_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicOr_system(int *__p, int __v) {
  return __nvvm_atom_sys_or_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicXor(int *__p, int __v) {
  return __nvvm_atom_xor_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicXor_block(int *__p, int __v) {
  return __nvvm_atom_cta_xor_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __iAtomicXor_system(int *__p, int __v) {
  return __nvvm_atom_sys_xor_gen_i(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __illAtomicMax(long long *__p, long long __v) {
  return __nvvm_atom_max_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __illAtomicMax_block(long long *__p, long long __v) {
  return __nvvm_atom_cta_max_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __illAtomicMax_system(long long *__p, long long __v) {
  return __nvvm_atom_sys_max_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __illAtomicMin(long long *__p, long long __v) {
  return __nvvm_atom_min_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __illAtomicMin_block(long long *__p, long long __v) {
  return __nvvm_atom_cta_min_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __illAtomicMin_system(long long *__p, long long __v) {
  return __nvvm_atom_sys_min_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __int2double_rn(int __a) { return __nv_int2double_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __int2float_rd(int __a) { return __nv_int2float_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __int2float_rn(int __a) { return __nv_int2float_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __int2float_ru(int __a) { return __nv_int2float_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __int2float_rz(int __a) { return __nv_int2float_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __int_as_float(int __a) { return __nv_int_as_float(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __isfinited(double __a) { return __nv_isfinited(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __isinf(double __a) { return __nv_isinfd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __isinff(float __a) { return __nv_isinff(__a); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) int __isnan(double __a) { return __nv_isnand(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __isnanf(float __a) { return __nv_isnanf(__a); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ll2double_rd(long long __a) {
  return __nv_ll2double_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ll2double_rn(long long __a) {
  return __nv_ll2double_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ll2double_ru(long long __a) {
  return __nv_ll2double_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ll2double_rz(long long __a) {
  return __nv_ll2double_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ll2float_rd(long long __a) { return __nv_ll2float_rd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ll2float_rn(long long __a) { return __nv_ll2float_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ll2float_ru(long long __a) { return __nv_ll2float_ru(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ll2float_rz(long long __a) { return __nv_ll2float_rz(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicAnd(long long *__p, long long __v) {
  return __nvvm_atom_and_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicAnd_block(long long *__p, long long __v) {
  return __nvvm_atom_cta_and_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicAnd_system(long long *__p, long long __v) {
  return __nvvm_atom_sys_and_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicOr(long long *__p, long long __v) {
  return __nvvm_atom_or_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicOr_block(long long *__p, long long __v) {
  return __nvvm_atom_cta_or_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicOr_system(long long *__p, long long __v) {
  return __nvvm_atom_sys_or_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicXor(long long *__p, long long __v) {
  return __nvvm_atom_xor_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicXor_block(long long *__p, long long __v) {
  return __nvvm_atom_cta_xor_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __llAtomicXor_system(long long *__p, long long __v) {
  return __nvvm_atom_sys_xor_gen_ll(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __log10f(float __a) { return __nv_fast_log10f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __log2f(float __a) { return __nv_fast_log2f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __logf(float __a) { return __nv_fast_logf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __longlong_as_double(long long __a) {
  return __nv_longlong_as_double(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __mul24(int __a, int __b) { return __nv_mul24(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long __mul64hi(long long __a, long long __b) {
  return __nv_mul64hi(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __mulhi(int __a, int __b) { return __nv_mulhi(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __pm0(void) { return __nvvm_read_ptx_sreg_pm0(); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __pm1(void) { return __nvvm_read_ptx_sreg_pm1(); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __pm2(void) { return __nvvm_read_ptx_sreg_pm2(); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __pm3(void) { return __nvvm_read_ptx_sreg_pm3(); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __popc(int __a) { return __nv_popc(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __popcll(long long __a) { return __nv_popcll(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __powf(float __a, float __b) {
  return __nv_fast_powf(__a, __b);
}


static __attribute__((device)) __inline__ __attribute__((always_inline)) int __rhadd(int __a, int __b) { return __nv_rhadd(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __sad(int __a, int __b, unsigned int __c) {
  return __nv_sad(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __saturatef(float __a) { return __nv_saturatef(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __signbitd(double __a) { return __nv_signbitd(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __signbitf(float __a) { return __nv_signbitf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) void __sincosf(float __a, float *__s, float *__c) {
  return __nv_fast_sincosf(__a, __s, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __sinf(float __a) { return __nv_fast_sinf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __syncthreads_count(int __a) { return __nvvm_bar0_popc(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int __syncthreads_or(int __a) { return __nvvm_bar0_or(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __tanf(float __a) { return __nv_fast_tanf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) void __threadfence(void) { __nvvm_membar_gl(); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) void __threadfence_block(void) { __nvvm_membar_cta(); };
static __attribute__((device)) __inline__ __attribute__((always_inline)) void __threadfence_system(void) { __nvvm_membar_sys(); };
static __attribute__((device)) __inline__ __attribute__((always_inline)) void __trap(void) { __asm__ __volatile__("trap;"); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicAdd(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_add_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicAdd_block(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_cta_add_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicAdd_system(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_sys_add_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicAnd(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_and_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicAnd_block(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_cta_and_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicAnd_system(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_sys_and_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicCAS(unsigned int *__p, unsigned int __cmp,
                                     unsigned int __v) {
  return __nvvm_atom_cas_gen_i((int *)__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int
__uAtomicCAS_block(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
  return __nvvm_atom_cta_cas_gen_i((int *)__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int
__uAtomicCAS_system(unsigned int *__p, unsigned int __cmp, unsigned int __v) {
  return __nvvm_atom_sys_cas_gen_i((int *)__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicDec(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_dec_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicDec_block(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_cta_dec_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicDec_system(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_sys_dec_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicExch(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_xchg_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicExch_block(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_cta_xchg_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicExch_system(unsigned int *__p,
                                             unsigned int __v) {
  return __nvvm_atom_sys_xchg_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicInc(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_inc_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicInc_block(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_cta_inc_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicInc_system(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_sys_inc_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicMax(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_max_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicMax_block(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_cta_max_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicMax_system(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_sys_max_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicMin(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_min_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicMin_block(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_cta_min_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicMin_system(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_sys_min_gen_ui(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicOr(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_or_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicOr_block(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_cta_or_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicOr_system(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_sys_or_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicXor(unsigned int *__p, unsigned int __v) {
  return __nvvm_atom_xor_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicXor_block(unsigned int *__p,
                                           unsigned int __v) {
  return __nvvm_atom_cta_xor_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uAtomicXor_system(unsigned int *__p,
                                            unsigned int __v) {
  return __nvvm_atom_sys_xor_gen_i((int *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __uhadd(unsigned int __a, unsigned int __b) {
  return __nv_uhadd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __uint2double_rn(unsigned int __a) {
  return __nv_uint2double_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __uint2float_rd(unsigned int __a) {
  return __nv_uint2float_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __uint2float_rn(unsigned int __a) {
  return __nv_uint2float_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __uint2float_ru(unsigned int __a) {
  return __nv_uint2float_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __uint2float_rz(unsigned int __a) {
  return __nv_uint2float_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __uint_as_float(unsigned int __a) {
  return __nv_uint_as_float(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ull2double_rd(unsigned long long __a) {
  return __nv_ull2double_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ull2double_rn(unsigned long long __a) {
  return __nv_ull2double_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ull2double_ru(unsigned long long __a) {
  return __nv_ull2double_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double __ull2double_rz(unsigned long long __a) {
  return __nv_ull2double_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ull2float_rd(unsigned long long __a) {
  return __nv_ull2float_rd(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ull2float_rn(unsigned long long __a) {
  return __nv_ull2float_rn(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ull2float_ru(unsigned long long __a) {
  return __nv_ull2float_ru(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float __ull2float_rz(unsigned long long __a) {
  return __nv_ull2float_rz(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicAdd(unsigned long long *__p,
                                             unsigned long long __v) {
  return __nvvm_atom_add_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicAdd_block(unsigned long long *__p,
                                                   unsigned long long __v) {
  return __nvvm_atom_cta_add_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicAdd_system(unsigned long long *__p,
                                                    unsigned long long __v) {
  return __nvvm_atom_sys_add_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicAnd(unsigned long long *__p,
                                             unsigned long long __v) {
  return __nvvm_atom_and_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicAnd_block(unsigned long long *__p,
                                                   unsigned long long __v) {
  return __nvvm_atom_cta_and_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicAnd_system(unsigned long long *__p,
                                                    unsigned long long __v) {
  return __nvvm_atom_sys_and_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicCAS(unsigned long long *__p,
                                             unsigned long long __cmp,
                                             unsigned long long __v) {
  return __nvvm_atom_cas_gen_ll((long long *)__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicCAS_block(unsigned long long *__p,
                                                   unsigned long long __cmp,
                                                   unsigned long long __v) {
  return __nvvm_atom_cta_cas_gen_ll((long long *)__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicCAS_system(unsigned long long *__p,
                                                    unsigned long long __cmp,
                                                    unsigned long long __v) {
  return __nvvm_atom_sys_cas_gen_ll((long long *)__p, __cmp, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicExch(unsigned long long *__p,
                                              unsigned long long __v) {
  return __nvvm_atom_xchg_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicExch_block(unsigned long long *__p,
                                                    unsigned long long __v) {
  return __nvvm_atom_cta_xchg_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicExch_system(unsigned long long *__p,
                                                     unsigned long long __v) {
  return __nvvm_atom_sys_xchg_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicMax(unsigned long long *__p,
                                             unsigned long long __v) {
  return __nvvm_atom_max_gen_ull(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicMax_block(unsigned long long *__p,
                                                   unsigned long long __v) {
  return __nvvm_atom_cta_max_gen_ull(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicMax_system(unsigned long long *__p,
                                                    unsigned long long __v) {
  return __nvvm_atom_sys_max_gen_ull(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicMin(unsigned long long *__p,
                                             unsigned long long __v) {
  return __nvvm_atom_min_gen_ull(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicMin_block(unsigned long long *__p,
                                                   unsigned long long __v) {
  return __nvvm_atom_cta_min_gen_ull(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicMin_system(unsigned long long *__p,
                                                    unsigned long long __v) {
  return __nvvm_atom_sys_min_gen_ull(__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicOr(unsigned long long *__p,
                                            unsigned long long __v) {
  return __nvvm_atom_or_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicOr_block(unsigned long long *__p,
                                                  unsigned long long __v) {
  return __nvvm_atom_cta_or_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicOr_system(unsigned long long *__p,
                                                   unsigned long long __v) {
  return __nvvm_atom_sys_or_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicXor(unsigned long long *__p,
                                             unsigned long long __v) {
  return __nvvm_atom_xor_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicXor_block(unsigned long long *__p,
                                                   unsigned long long __v) {
  return __nvvm_atom_cta_xor_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __ullAtomicXor_system(unsigned long long *__p,
                                                    unsigned long long __v) {
  return __nvvm_atom_sys_xor_gen_ll((long long *)__p, __v);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __umul24(unsigned int __a, unsigned int __b) {
  return __nv_umul24(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long __umul64hi(unsigned long long __a,
                                         unsigned long long __b) {
  return __nv_umul64hi(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __umulhi(unsigned int __a, unsigned int __b) {
  return __nv_umulhi(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __urhadd(unsigned int __a, unsigned int __b) {
  return __nv_urhadd(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __usad(unsigned int __a, unsigned int __b,
                               unsigned int __c) {
  return __nv_usad(__a, __b, __c);
}
# 1051 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_device_functions.h" 3
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __bool2mask(unsigned int __a, int shift) {
  return (__a << shift) - __a;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabs2(unsigned int __a) {
  unsigned int r;
  __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(0), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabs4(unsigned int __a) {
  unsigned int r;
  __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(0), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabsdiffs2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff2.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}

static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabsdiffs4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff4.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabsdiffu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff2.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabsdiffu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff4.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabsss2(unsigned int __a) {
  unsigned int r;
  __asm__("vabsdiff2.s32.s32.s32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(0), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vabsss4(unsigned int __a) {
  unsigned int r;
  __asm__("vabsdiff4.s32.s32.s32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(0), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vadd2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vadd2.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vadd4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vadd4.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vaddss2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vadd2.s32.s32.s32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vaddss4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vadd4.s32.s32.s32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vaddus2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vadd2.u32.u32.u32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vaddus4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vadd4.u32.u32.u32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vavgs2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vavrg2.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vavgs4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vavrg4.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vavgu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vavrg2.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vavgu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vavrg4.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vseteq2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.u32.u32.eq %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpeq2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vseteq2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vseteq4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.u32.u32.eq %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpeq4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vseteq4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetges2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.s32.s32.ge %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpges2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetges2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetges4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.s32.s32.ge %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpges4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetges4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetgeu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.u32.u32.ge %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpgeu2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetgeu2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetgeu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.u32.u32.ge %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpgeu4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetgeu4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetgts2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.s32.s32.gt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpgts2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetgts2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetgts4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.s32.s32.gt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpgts4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetgts4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetgtu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.u32.u32.gt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpgtu2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetgtu2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetgtu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.u32.u32.gt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpgtu4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetgtu4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetles2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.s32.s32.le %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmples2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetles2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetles4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.s32.s32.le %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmples4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetles4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetleu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.u32.u32.le %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpleu2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetleu2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetleu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.u32.u32.le %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpleu4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetleu4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetlts2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.s32.s32.lt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmplts2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetlts2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetlts4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.s32.s32.lt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmplts4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetlts4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetltu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.u32.u32.lt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpltu2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetltu2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetltu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.u32.u32.lt %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpltu4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetltu4(__a, __b), 8);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetne2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset2.u32.u32.ne %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpne2(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetne2(__a, __b), 16);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsetne4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vset4.u32.u32.ne %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vcmpne4(unsigned int __a, unsigned int __b) {
  return __bool2mask(__vsetne4(__a, __b), 8);
}


static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vhaddu2(unsigned int __a, unsigned int __b) {
  return (((__a ^ __b) >> 1) & ~0x80008000u) + (__a & __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vhaddu4(unsigned int __a, unsigned int __b) {
  return (((__a ^ __b) >> 1) & ~0x80808080u) + (__a & __b);
}

static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vmaxs2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  if ((__a & 0x8000) && (__b & 0x8000)) {


    unsigned mask = __vcmpgts2(__a, __b);
    r = (__a & mask) | (__b & ~mask);
  } else {
    __asm__("vmax2.s32.s32.s32 %0,%1,%2,%3;"
            : "=r"(r)
            : "r"(__a), "r"(__b), "r"(0));
  }
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vmaxs4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vmax4.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vmaxu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vmax2.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vmaxu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vmax4.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vmins2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vmin2.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vmins4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vmin4.s32.s32.s32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vminu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vmin2.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vminu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vmin4.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsads2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff2.s32.s32.s32.add %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsads4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff4.s32.s32.s32.add %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsadu2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff2.u32.u32.u32.add %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsadu4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vabsdiff4.u32.u32.u32.add %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}

static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsub2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vsub2.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vneg2(unsigned int __a) { return __vsub2(0, __a); }

static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsub4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vsub4.u32.u32.u32 %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vneg4(unsigned int __a) { return __vsub4(0, __a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsubss2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vsub2.s32.s32.s32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vnegss2(unsigned int __a) {
  return __vsubss2(0, __a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsubss4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vsub4.s32.s32.s32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vnegss4(unsigned int __a) {
  return __vsubss4(0, __a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsubus2(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vsub2.u32.u32.u32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int __vsubus4(unsigned int __a, unsigned int __b) {
  unsigned int r;
  __asm__("vsub4.u32.u32.u32.sat %0,%1,%2,%3;"
          : "=r"(r)
          : "r"(__a), "r"(__b), "r"(0));
  return r;
}


static __attribute__((device)) __inline__ __attribute__((always_inline)) int clock() { return __nvvm_read_ptx_sreg_clock(); }

static __attribute__((device)) __inline__ __attribute__((always_inline)) long long clock64() { return __nvvm_read_ptx_sreg_clock64(); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) void *memcpy(void *__a, const void *__b, size_t __c) {
  return __builtin_memcpy(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) void *memset(void *__a, int __b, size_t __c) {
  return __builtin_memset(__a, __b, __c);
}
# 158 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_math.h" 1 3
# 56 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_math.h" 3
static __attribute__((device)) __inline__ __attribute__((always_inline)) int abs(int __a) { return __nv_abs(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double fabs(double __a) { return __nv_fabs(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double acos(double __a) { return __nv_acos(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float acosf(float __a) { return __nv_acosf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double acosh(double __a) { return __nv_acosh(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float acoshf(float __a) { return __nv_acoshf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double asin(double __a) { return __nv_asin(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float asinf(float __a) { return __nv_asinf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double asinh(double __a) { return __nv_asinh(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float asinhf(float __a) { return __nv_asinhf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double atan(double __a) { return __nv_atan(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double atan2(double __a, double __b) { return __nv_atan2(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float atan2f(float __a, float __b) { return __nv_atan2f(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float atanf(float __a) { return __nv_atanf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double atanh(double __a) { return __nv_atanh(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float atanhf(float __a) { return __nv_atanhf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double cbrt(double __a) { return __nv_cbrt(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float cbrtf(float __a) { return __nv_cbrtf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double ceil(double __a) { return __nv_ceil(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float ceilf(float __a) { return __nv_ceilf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double copysign(double __a, double __b) {
  return __nv_copysign(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float copysignf(float __a, float __b) {
  return __nv_copysignf(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double cos(double __a) { return __nv_cos(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float cosf(float __a) {
  return __nv_cosf(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double cosh(double __a) { return __nv_cosh(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float coshf(float __a) { return __nv_coshf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double cospi(double __a) { return __nv_cospi(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float cospif(float __a) { return __nv_cospif(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double cyl_bessel_i0(double __a) { return __nv_cyl_bessel_i0(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float cyl_bessel_i0f(float __a) { return __nv_cyl_bessel_i0f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double cyl_bessel_i1(double __a) { return __nv_cyl_bessel_i1(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float cyl_bessel_i1f(float __a) { return __nv_cyl_bessel_i1f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double erf(double __a) { return __nv_erf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double erfc(double __a) { return __nv_erfc(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float erfcf(float __a) { return __nv_erfcf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double erfcinv(double __a) { return __nv_erfcinv(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float erfcinvf(float __a) { return __nv_erfcinvf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double erfcx(double __a) { return __nv_erfcx(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float erfcxf(float __a) { return __nv_erfcxf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float erff(float __a) { return __nv_erff(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double erfinv(double __a) { return __nv_erfinv(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float erfinvf(float __a) { return __nv_erfinvf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double exp(double __a) { return __nv_exp(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double exp10(double __a) { return __nv_exp10(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float exp10f(float __a) { return __nv_exp10f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double exp2(double __a) { return __nv_exp2(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float exp2f(float __a) { return __nv_exp2f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float expf(float __a) { return __nv_expf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double expm1(double __a) { return __nv_expm1(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float expm1f(float __a) { return __nv_expm1f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fabsf(float __a) { return __nv_fabsf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double fdim(double __a, double __b) { return __nv_fdim(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double fdivide(double __a, double __b) { return __a / __b; }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fdividef(float __a, float __b) {


  return __a / __b;

}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double floor(double __f) { return __nv_floor(__f); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float floorf(float __f) { return __nv_floorf(__f); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double fma(double __a, double __b, double __c) {
  return __nv_fma(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fmaf(float __a, float __b, float __c) {
  return __nv_fmaf(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double fmax(double __a, double __b) { return __nv_fmax(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fmaxf(float __a, float __b) { return __nv_fmaxf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double fmin(double __a, double __b) { return __nv_fmin(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fminf(float __a, float __b) { return __nv_fminf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double fmod(double __a, double __b) { return __nv_fmod(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fmodf(float __a, float __b) { return __nv_fmodf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double frexp(double __a, int *__b) { return __nv_frexp(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float frexpf(float __a, int *__b) { return __nv_frexpf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double hypot(double __a, double __b) { return __nv_hypot(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float hypotf(float __a, float __b) { return __nv_hypotf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int ilogb(double __a) { return __nv_ilogb(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int ilogbf(float __a) { return __nv_ilogbf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double j0(double __a) { return __nv_j0(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float j0f(float __a) { return __nv_j0f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double j1(double __a) { return __nv_j1(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float j1f(float __a) { return __nv_j1f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double jn(int __n, double __a) { return __nv_jn(__n, __a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float jnf(int __n, float __a) { return __nv_jnf(__n, __a); }

static __attribute__((device)) __inline__ __attribute__((always_inline)) long labs(long __a) { return __nv_llabs(__a); };


static __attribute__((device)) __inline__ __attribute__((always_inline)) double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double lgamma(double __a) { return __nv_lgamma(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float lgammaf(float __a) { return __nv_lgammaf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long llabs(long long __a) { return __nv_llabs(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long llmax(long long __a, long long __b) {
  return __nv_llmax(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long llmin(long long __a, long long __b) {
  return __nv_llmin(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long llrint(double __a) { return __nv_llrint(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long llrintf(float __a) { return __nv_llrintf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long llround(double __a) { return __nv_llround(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long llroundf(float __a) { return __nv_llroundf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double round(double __a) { return __nv_round(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float roundf(float __a) { return __nv_roundf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double log(double __a) { return __nv_log(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double log10(double __a) { return __nv_log10(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float log10f(float __a) { return __nv_log10f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double log1p(double __a) { return __nv_log1p(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float log1pf(float __a) { return __nv_log1pf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double log2(double __a) { return __nv_log2(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float log2f(float __a) {
  return __nv_log2f(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double logb(double __a) { return __nv_logb(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float logbf(float __a) { return __nv_logbf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float logf(float __a) {
  return __nv_logf(__a);
}

static __attribute__((device)) __inline__ __attribute__((always_inline)) long lrint(double __a) { return llrint(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long lrintf(float __a) { return __float2ll_rn(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long lround(double __a) { return llround(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long lroundf(float __a) { return llroundf(__a); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) int max(int __a, int __b) { return __nv_max(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int min(int __a, int __b) { return __nv_min(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double modf(double __a, double *__b) { return __nv_modf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float modff(float __a, float *__b) { return __nv_modff(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double nearbyint(double __a) { return __builtin_nearbyint(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float nearbyintf(float __a) { return __builtin_nearbyintf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double nextafter(double __a, double __b) {
  return __nv_nextafter(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float nextafterf(float __a, float __b) {
  return __nv_nextafterf(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double norm(int __dim, const double *__t) {
  return __nv_norm(__dim, __t);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double norm3d(double __a, double __b, double __c) {
  return __nv_norm3d(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float norm3df(float __a, float __b, float __c) {
  return __nv_norm3df(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double norm4d(double __a, double __b, double __c, double __d) {
  return __nv_norm4d(__a, __b, __c, __d);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float norm4df(float __a, float __b, float __c, float __d) {
  return __nv_norm4df(__a, __b, __c, __d);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double normcdf(double __a) { return __nv_normcdf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float normcdff(float __a) { return __nv_normcdff(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double normcdfinv(double __a) { return __nv_normcdfinv(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float normcdfinvf(float __a) { return __nv_normcdfinvf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float normf(int __dim, const float *__t) {
  return __nv_normf(__dim, __t);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double pow(double __a, double __b) { return __nv_pow(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float powf(float __a, float __b) { return __nv_powf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double powi(double __a, int __b) { return __nv_powi(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float powif(float __a, int __b) { return __nv_powif(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double rcbrt(double __a) { return __nv_rcbrt(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float rcbrtf(float __a) { return __nv_rcbrtf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double remainder(double __a, double __b) {
  return __nv_remainder(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float remainderf(float __a, float __b) {
  return __nv_remainderf(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double remquo(double __a, double __b, int *__c) {
  return __nv_remquo(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float remquof(float __a, float __b, int *__c) {
  return __nv_remquof(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double rhypot(double __a, double __b) {
  return __nv_rhypot(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float rhypotf(float __a, float __b) {
  return __nv_rhypotf(__a, __b);
}

static __attribute__((device)) __inline__ __attribute__((always_inline)) double rint(double __a) { return __builtin_rint(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float rintf(float __a) { return __builtin_rintf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double rnorm(int __a, const double *__b) {
  return __nv_rnorm(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double rnorm3d(double __a, double __b, double __c) {
  return __nv_rnorm3d(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float rnorm3df(float __a, float __b, float __c) {
  return __nv_rnorm3df(__a, __b, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double rnorm4d(double __a, double __b, double __c, double __d) {
  return __nv_rnorm4d(__a, __b, __c, __d);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float rnorm4df(float __a, float __b, float __c, float __d) {
  return __nv_rnorm4df(__a, __b, __c, __d);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float rnormf(int __dim, const float *__t) {
  return __nv_rnormf(__dim, __t);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double rsqrt(double __a) { return __nv_rsqrt(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float rsqrtf(float __a) { return __nv_rsqrtf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double scalbln(double __a, long __b) {
  if (__b > 2147483647)
    return __a > 0 ? (__builtin_huge_val ()) : -(__builtin_huge_val ());
  if (__b < (-2147483647 -1))
    return __a > 0 ? 0.0 : -0.0;
  return scalbn(__a, (int)__b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float scalblnf(float __a, long __b) {
  if (__b > 2147483647)
    return __a > 0 ? (__builtin_huge_valf ()) : -(__builtin_huge_valf ());
  if (__b < (-2147483647 -1))
    return __a > 0 ? 0.f : -0.f;
  return scalbnf(__a, (int)__b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double sin(double __a) { return __nv_sin(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) void sincos(double __a, double *__s, double *__c) {
  return __nv_sincos(__a, __s, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) void sincosf(float __a, float *__s, float *__c) {
  return __nv_sincosf(__a, __s, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) void sincospi(double __a, double *__s, double *__c) {
  return __nv_sincospi(__a, __s, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) void sincospif(float __a, float *__s, float *__c) {
  return __nv_sincospif(__a, __s, __c);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float sinf(float __a) {
  return __nv_sinf(__a);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double sinh(double __a) { return __nv_sinh(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float sinhf(float __a) { return __nv_sinhf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double sinpi(double __a) { return __nv_sinpi(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float sinpif(float __a) { return __nv_sinpif(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double sqrt(double __a) { return __nv_sqrt(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float sqrtf(float __a) { return __nv_sqrtf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double tan(double __a) { return __nv_tan(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float tanf(float __a) { return __nv_tanf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double tanh(double __a) { return __nv_tanh(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float tanhf(float __a) { return __nv_tanhf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double tgamma(double __a) { return __nv_tgamma(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float tgammaf(float __a) { return __nv_tgammaf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double trunc(double __a) { return __nv_trunc(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float truncf(float __a) { return __nv_truncf(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long ullmax(unsigned long long __a,
                                     unsigned long long __b) {
  return __nv_ullmax(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned long long ullmin(unsigned long long __a,
                                     unsigned long long __b) {
  return __nv_ullmin(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int umax(unsigned int __a, unsigned int __b) {
  return __nv_umax(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) unsigned int umin(unsigned int __a, unsigned int __b) {
  return __nv_umin(__a, __b);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double y0(double __a) { return __nv_y0(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float y0f(float __a) { return __nv_y0f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double y1(double __a) { return __nv_y1(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float y1f(float __a) { return __nv_y1f(__a); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double yn(int __a, double __b) { return __nv_yn(__a, __b); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
# 159 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 209 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 1 3
# 1188 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 1189 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 2 3
# 1 "/usr/local/cuda-11.7/include/math_constants.h" 1 3
# 1190 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 2 3
# 1200 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 3
# 1 "/usr/local/cuda-11.7/include/crt/func_macro.h" 1 3
# 1201 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 2 3
# 2944 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 3
inline double rsqrt(const double a)
{
  return 1.0 / sqrt(a);
}

inline double rcbrt(const double a)
{
  double s, t;

  if (__isnan(a)) {
    return a + a;
  }
  if (a == 0.0 || __isinf(a)) {
    return 1.0 / a;
  }
  s = fabs(a);
  t = exp2(-3.3333333333333333e-1 * log2(s));
  t = ((t*t) * (-s*t) + 1.0) * (3.3333333333333333e-1*t) + t;


  if (__signbit(a))

  {
    t = -t;
  }
  return t;
}

inline double sinpi(double a)
{
  int n;

  if (__isnan(a)) {
    return a + a;
  }
  if (a == 0.0 || __isinf(a)) {
    return sin (a);
  }
  if (a == floor(a)) {
    return ((a / 1.0e308) / 1.0e308) / 1.0e308;
  }
  double twoa = a + a;
  double rtwoa = round(twoa);
  long long int l = (long long int)rtwoa;
  n = (int)l;
  a -= rtwoa * 0.5;
  a = a * 3.1415926535897931e+0;
  if (n & 1) {
    a = cos (a);
  } else {
    a = sin (a);
  }
  if (n & 2) {
    a = -a;
  }
  return a;
}

inline double cospi(double a)
{
  int n;

  if (__isnan(a)) {
    return a + a;
  }
  if (__isinf(a)) {
    return cos (a);
  }
  if (fabs(a) > 9.0071992547409920e+015) {
    a = 0.0;
  }
  double twoa = a + a;
  double rtwoa = round(twoa);
  long long int l = (long long int)rtwoa;
  n = (int)l;
  a -= rtwoa * 0.5;
  a = a * 3.1415926535897931e+0;
  n++;
  if (n & 1) {
    a = cos (a);
  } else {
    a = sin (a);
  }
  if (n & 2) {
    a = -a;
  }
  if (a == 0.0) {
    a = fabs(a);
  }
  return a;
}

inline void sincospi(const double a, double *sptr, double *cptr)
{
  *sptr = sinpi(a);
  *cptr = cospi(a);
}

inline double erfinv(const double a)
{
  double p, q, t, fa;
  unsigned long long int l;

  fa = fabs(a);
  if (fa >= 1.0) {
    l = 0xfff8000000000000ULL;
    memcpy(&t, &l, sizeof(double));
    if (fa == 1.0) {
      t = a * exp(1000.0);
    }
  } else if (fa >= 0.9375) {


    t = log1p(-fa);
    t = 1.0 / sqrt(-t);
    p = 2.7834010353747001060e-3;
    p = p * t + 8.6030097526280260580e-1;
    p = p * t + 2.1371214997265515515e+0;
    p = p * t + 3.1598519601132090206e+0;
    p = p * t + 3.5780402569085996758e+0;
    p = p * t + 1.5335297523989890804e+0;
    p = p * t + 3.4839207139657522572e-1;
    p = p * t + 5.3644861147153648366e-2;
    p = p * t + 4.3836709877126095665e-3;
    p = p * t + 1.3858518113496718808e-4;
    p = p * t + 1.1738352509991666680e-6;
    q = t + 2.2859981272422905412e+0;
    q = q * t + 4.3859045256449554654e+0;
    q = q * t + 4.6632960348736635331e+0;
    q = q * t + 3.9846608184671757296e+0;
    q = q * t + 1.6068377709719017609e+0;
    q = q * t + 3.5609087305900265560e-1;
    q = q * t + 5.3963550303200816744e-2;
    q = q * t + 4.3873424022706935023e-3;
    q = q * t + 1.3858762165532246059e-4;
    q = q * t + 1.1738313872397777529e-6;
    t = p / (q * t);
    if (a < 0.0) t = -t;
  } else if (fa >= 0.75) {


    t = a * a - .87890625;
    p = .21489185007307062000e+0;
    p = p * t - .64200071507209448655e+1;
    p = p * t + .29631331505876308123e+2;
    p = p * t - .47644367129787181803e+2;
    p = p * t + .34810057749357500873e+2;
    p = p * t - .12954198980646771502e+2;
    p = p * t + .25349389220714893917e+1;
    p = p * t - .24758242362823355486e+0;
    p = p * t + .94897362808681080020e-2;
    q = t - .12831383833953226499e+2;
    q = q * t + .41409991778428888716e+2;
    q = q * t - .53715373448862143349e+2;
    q = q * t + .33880176779595142685e+2;
    q = q * t - .11315360624238054876e+2;
    q = q * t + .20369295047216351160e+1;
    q = q * t - .18611650627372178511e+0;
    q = q * t + .67544512778850945940e-2;
    p = p / q;
    t = a * p;
  } else {


    t = a * a - .5625;
    p = - .23886240104308755900e+2;
    p = p * t + .45560204272689128170e+3;
    p = p * t - .22977467176607144887e+4;
    p = p * t + .46631433533434331287e+4;
    p = p * t - .43799652308386926161e+4;
    p = p * t + .19007153590528134753e+4;
    p = p * t - .30786872642313695280e+3;
    q = t - .83288327901936570000e+2;
    q = q * t + .92741319160935318800e+3;
    q = q * t - .35088976383877264098e+4;
    q = q * t + .59039348134843665626e+4;
    q = q * t - .48481635430048872102e+4;
    q = q * t + .18997769186453057810e+4;
    q = q * t - .28386514725366621129e+3;
    p = p / q;
    t = a * p;
  }
  return t;
}

inline double erfcinv(const double a)
{
  double t;
  unsigned long long int l;

  if (__isnan(a)) {
    return a + a;
  }
  if (a <= 0.0) {
    l = 0xfff8000000000000ULL;
    memcpy(&t, &l, sizeof(double));
    if (a == 0.0) {
        t = (1.0 - a) * exp(1000.0);
    }
  }
  else if (a >= 0.0625) {
    t = erfinv (1.0 - a);
  }
  else if (a >= 1e-100) {


    double p, q;
    t = log(a);
    t = 1.0 / sqrt(-t);
    p = 2.7834010353747001060e-3;
    p = p * t + 8.6030097526280260580e-1;
    p = p * t + 2.1371214997265515515e+0;
    p = p * t + 3.1598519601132090206e+0;
    p = p * t + 3.5780402569085996758e+0;
    p = p * t + 1.5335297523989890804e+0;
    p = p * t + 3.4839207139657522572e-1;
    p = p * t + 5.3644861147153648366e-2;
    p = p * t + 4.3836709877126095665e-3;
    p = p * t + 1.3858518113496718808e-4;
    p = p * t + 1.1738352509991666680e-6;
    q = t + 2.2859981272422905412e+0;
    q = q * t + 4.3859045256449554654e+0;
    q = q * t + 4.6632960348736635331e+0;
    q = q * t + 3.9846608184671757296e+0;
    q = q * t + 1.6068377709719017609e+0;
    q = q * t + 3.5609087305900265560e-1;
    q = q * t + 5.3963550303200816744e-2;
    q = q * t + 4.3873424022706935023e-3;
    q = q * t + 1.3858762165532246059e-4;
    q = q * t + 1.1738313872397777529e-6;
    t = p / (q * t);
  }
  else {


    double p, q;
    t = log(a);
    t = 1.0 / sqrt(-t);
    p = 6.9952990607058154858e-1;
    p = p * t + 1.9507620287580568829e+0;
    p = p * t + 8.2810030904462690216e-1;
    p = p * t + 1.1279046353630280005e-1;
    p = p * t + 6.0537914739162189689e-3;
    p = p * t + 1.3714329569665128933e-4;
    p = p * t + 1.2964481560643197452e-6;
    p = p * t + 4.6156006321345332510e-9;
    p = p * t + 4.5344689563209398450e-12;
    q = t + 1.5771922386662040546e+0;
    q = q * t + 2.1238242087454993542e+0;
    q = q * t + 8.4001814918178042919e-1;
    q = q * t + 1.1311889334355782065e-1;
    q = q * t + 6.0574830550097140404e-3;
    q = q * t + 1.3715891988350205065e-4;
    q = q * t + 1.2964671850944981713e-6;
    q = q * t + 4.6156017600933592558e-9;
    q = q * t + 4.5344687377088206783e-12;
    t = p / (q * t);
  }
  return t;
}

inline double normcdfinv(const double a)
{
  return -1.4142135623730951 * erfcinv(a + a);
}

inline double normcdf(double a)
{
  double ah, al, t1, t2, u1, u2, v1, v2, z;
  if (fabs (a) > 38.5) a = copysign (38.5, a);
  ah = a * 134217729.0;
  u1 = (a - ah) + ah;
  u2 = a - u1;
  v1 = -7.0710678398609161e-01;
  v2 = 2.7995440410322203e-09;
  t1 = a * -7.0710678118654757e-1;
  t2 = (((u1 * v1 - t1) + u1 * v2) + u2 * v1) + u2 * v2;
  t2 = (a * -(-4.8336466567264567e-17)) + t2;
  ah = t1 + t2;
  z = erfc (ah);
  if (a < -1.0) {
    al = (t1 - ah) + t2;
    t1 = -2.0 * ah * z;
    z = t1 * al + z;
  }
  return 0.5 * z;
}

inline double erfcx(const double a)
{
  double x, t1, t2, t3;

  if (__isnan(a)) {
    return a + a;
  }
  x = fabs(a);
  if (x < 32.0) {
# 3266 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 3
    t1 = x - 4.0;
    t2 = x + 4.0;
    t2 = t1 / t2;

    t1 = - 3.5602694826817400E-010;
    t1 = t1 * t2 - 9.7239122591447274E-009;
    t1 = t1 * t2 - 8.9350224851649119E-009;
    t1 = t1 * t2 + 1.0404430921625484E-007;
    t1 = t1 * t2 + 5.8806698585341259E-008;
    t1 = t1 * t2 - 8.2147414929116908E-007;
    t1 = t1 * t2 + 3.0956409853306241E-007;
    t1 = t1 * t2 + 5.7087871844325649E-006;
    t1 = t1 * t2 - 1.1231787437600085E-005;
    t1 = t1 * t2 - 2.4399558857200190E-005;
    t1 = t1 * t2 + 1.5062557169571788E-004;
    t1 = t1 * t2 - 1.9925637684786154E-004;
    t1 = t1 * t2 - 7.5777429182785833E-004;
    t1 = t1 * t2 + 5.0319698792599572E-003;
    t1 = t1 * t2 - 1.6197733895953217E-002;
    t1 = t1 * t2 + 3.7167515553018733E-002;
    t1 = t1 * t2 - 6.6330365827532434E-002;
    t1 = t1 * t2 + 9.3732834997115544E-002;
    t1 = t1 * t2 - 1.0103906603555676E-001;
    t1 = t1 * t2 + 6.8097054254735140E-002;
    t1 = t1 * t2 + 1.5379652102605428E-002;
    t1 = t1 * t2 - 1.3962111684056291E-001;
    t1 = t1 * t2 + 1.2329951186255526E+000;


    t2 = 2.0 * x + 1.0;
    t1 = t1 / t2;
  } else {

    t2 = 1.0 / x;
    t3 = t2 * t2;
    t1 = -29.53125;
    t1 = t1 * t3 + 6.5625;
    t1 = t1 * t3 - 1.875;
    t1 = t1 * t3 + 0.75;
    t1 = t1 * t3 - 0.5;
    t1 = t1 * t3 + 1.0;
    t2 = t2 * 5.6418958354775628e-001;
    t1 = t1 * t2;
  }
  if (a < 0.0) {


    t2 = (static_cast<int>(x * 16.0)) * 0.0625;
    t3 = (x - t2) * (x + t2);
    t3 = exp(t2 * t2) * exp(t3);
    t3 = t3 + t3;
    t1 = t3 - t1;
  }
  return t1;
}

inline float rsqrtf(const float a)
{
  return static_cast<float>(rsqrt(static_cast<double>(a)));
}

inline float rcbrtf(const float a)
{
  return static_cast<float>(rcbrt(static_cast<double>(a)));
}

inline float sinpif(const float a)
{
  return static_cast<float>(sinpi(static_cast<double>(a)));
}

inline float cospif(const float a)
{
  return static_cast<float>(cospi(static_cast<double>(a)));
}

inline void sincospif(const float a, float *sptr, float *cptr)
{
  double s, c;

  sincospi(static_cast<double>(a), &s, &c);
  *sptr = static_cast<float>(s);
  *cptr = static_cast<float>(c);
}

inline float erfinvf(const float a)
{
  return static_cast<float>(erfinv(static_cast<double>(a)));
}

inline float erfcinvf(const float a)
{
  return static_cast<float>(erfcinv(static_cast<double>(a)));
}

inline float normcdfinvf(const float a)
{
  return static_cast<float>(normcdfinv(static_cast<double>(a)));
}

inline float normcdff(const float a)
{
  return static_cast<float>(normcdf(static_cast<double>(a)));
}

inline float erfcxf(const float a)
{
  return static_cast<float>(erfcx(static_cast<double>(a)));
}
# 210 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 231 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
static inline float rsqrt(float __a) { return rsqrtf(__a); }
static inline float rcbrt(float __a) { return rcbrtf(__a); }
static inline float sinpi(float __a) { return sinpif(__a); }
static inline float cospi(float __a) { return cospif(__a); }
static inline void sincospi(float __a, float *__b, float *__c) {
  return sincospif(__a, __b, __c);
}
static inline float erfcinv(float __a) { return erfcinvf(__a); }
static inline float normcdfinv(float __a) { return normcdfinvf(__a); }
static inline float normcdf(float __a) { return normcdff(__a); }
static inline float erfcx(float __a) { return erfcxf(__a); }
# 260 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/device_atomic_functions.h" 1 3
# 76 "/usr/local/cuda-11.7/include/device_atomic_functions.h" 3
extern "C"
{
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicAdd(int *address, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicAdd(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicExch(int *address, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicExch(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) float __fAtomicExch(float *address, float val);
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicMin(int *address, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicMin(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicMax(int *address, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicMax(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicInc(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicDec(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicAnd(int *address, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicAnd(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicOr(int *address, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicOr(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicXor(int *address, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicXor(unsigned int *address, unsigned int val);
extern __attribute__((device)) __attribute__((device_builtin)) int __iAtomicCAS(int *address, int compare, int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned int __uAtomicCAS(unsigned int *address, unsigned int compare, unsigned int val);
}
# 106 "/usr/local/cuda-11.7/include/device_atomic_functions.h" 3
static __inline__ __attribute__((device)) int atomicAdd(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicAdd(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) int atomicSub(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicSub(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) int atomicExch(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicExch(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) float atomicExch(float *address, float val) ;

static __inline__ __attribute__((device)) int atomicMin(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicMin(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) int atomicMax(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicMax(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) unsigned int atomicInc(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) unsigned int atomicDec(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) int atomicAnd(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicAnd(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) int atomicOr(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicOr(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) int atomicXor(int *address, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicXor(unsigned int *address, unsigned int val) ;

static __inline__ __attribute__((device)) int atomicCAS(int *address, int compare, int val) ;

static __inline__ __attribute__((device)) unsigned int atomicCAS(unsigned int *address, unsigned int compare, unsigned int val) ;
# 171 "/usr/local/cuda-11.7/include/device_atomic_functions.h" 3
extern "C"
{

extern __attribute__((device)) __attribute__((device_builtin)) unsigned long long int __ullAtomicAdd(unsigned long long int *address, unsigned long long int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned long long int __ullAtomicExch(unsigned long long int *address, unsigned long long int val);
extern __attribute__((device)) __attribute__((device_builtin)) unsigned long long int __ullAtomicCAS(unsigned long long int *address, unsigned long long int compare, unsigned long long int val);

extern __attribute__((device)) __attribute__((device_builtin)) __attribute__((deprecated("__any""() is deprecated in favor of ""__any""_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."))) int __any(int cond);
extern __attribute__((device)) __attribute__((device_builtin)) __attribute__((deprecated("__all""() is deprecated in favor of ""__all""_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."))) int __all(int cond);
}
# 189 "/usr/local/cuda-11.7/include/device_atomic_functions.h" 3
static __inline__ __attribute__((device)) unsigned long long int atomicAdd(unsigned long long int *address, unsigned long long int val) ;

static __inline__ __attribute__((device)) unsigned long long int atomicExch(unsigned long long int *address, unsigned long long int val) ;

static __inline__ __attribute__((device)) unsigned long long int atomicCAS(unsigned long long int *address, unsigned long long int compare, unsigned long long int val) ;

static __inline__ __attribute__((device)) __attribute__((deprecated("__any""() is deprecated in favor of ""__any""_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."))) bool any(bool cond) ;

static __inline__ __attribute__((device)) __attribute__((deprecated("__all""() is deprecated in favor of ""__all""_sync() and may be removed in a future release (Use -Wno-deprecated-declarations to suppress this warning)."))) bool all(bool cond) ;
# 208 "/usr/local/cuda-11.7/include/device_atomic_functions.h" 3
# 1 "/usr/local/cuda-11.7/include/device_atomic_functions.hpp" 1 3
# 75 "/usr/local/cuda-11.7/include/device_atomic_functions.hpp" 3
static __inline__ __attribute__((device)) int atomicAdd(int *address, int val)
{
  return __iAtomicAdd(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicAdd(unsigned int *address, unsigned int val)
{
  return __uAtomicAdd(address, val);
}

static __inline__ __attribute__((device)) int atomicSub(int *address, int val)
{
  return __iAtomicAdd(address, (unsigned int)-(int)val);
}

static __inline__ __attribute__((device)) unsigned int atomicSub(unsigned int *address, unsigned int val)
{
  return __uAtomicAdd(address, (unsigned int)-(int)val);
}

static __inline__ __attribute__((device)) int atomicExch(int *address, int val)
{
  return __iAtomicExch(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicExch(unsigned int *address, unsigned int val)
{
  return __uAtomicExch(address, val);
}

static __inline__ __attribute__((device)) float atomicExch(float *address, float val)
{
  return __fAtomicExch(address, val);
}

static __inline__ __attribute__((device)) int atomicMin(int *address, int val)
{
  return __iAtomicMin(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicMin(unsigned int *address, unsigned int val)
{
  return __uAtomicMin(address, val);
}

static __inline__ __attribute__((device)) int atomicMax(int *address, int val)
{
  return __iAtomicMax(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicMax(unsigned int *address, unsigned int val)
{
  return __uAtomicMax(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicInc(unsigned int *address, unsigned int val)
{
  return __uAtomicInc(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicDec(unsigned int *address, unsigned int val)
{
  return __uAtomicDec(address, val);
}

static __inline__ __attribute__((device)) int atomicAnd(int *address, int val)
{
  return __iAtomicAnd(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicAnd(unsigned int *address, unsigned int val)
{
  return __uAtomicAnd(address, val);
}

static __inline__ __attribute__((device)) int atomicOr(int *address, int val)
{
  return __iAtomicOr(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicOr(unsigned int *address, unsigned int val)
{
  return __uAtomicOr(address, val);
}

static __inline__ __attribute__((device)) int atomicXor(int *address, int val)
{
  return __iAtomicXor(address, val);
}

static __inline__ __attribute__((device)) unsigned int atomicXor(unsigned int *address, unsigned int val)
{
  return __uAtomicXor(address, val);
}

static __inline__ __attribute__((device)) int atomicCAS(int *address, int compare, int val)
{
  return __iAtomicCAS(address, compare, val);
}

static __inline__ __attribute__((device)) unsigned int atomicCAS(unsigned int *address, unsigned int compare, unsigned int val)
{
  return __uAtomicCAS(address, compare, val);
}
# 194 "/usr/local/cuda-11.7/include/device_atomic_functions.hpp" 3
static __inline__ __attribute__((device)) unsigned long long int atomicAdd(unsigned long long int *address, unsigned long long int val)
{
  return __ullAtomicAdd(address, val);
}

static __inline__ __attribute__((device)) unsigned long long int atomicExch(unsigned long long int *address, unsigned long long int val)
{
  return __ullAtomicExch(address, val);
}

static __inline__ __attribute__((device)) unsigned long long int atomicCAS(unsigned long long int *address, unsigned long long int compare, unsigned long long int val)
{
  return __ullAtomicCAS(address, compare, val);
}

static __inline__ __attribute__((device)) bool any(bool cond)
{
  return (bool)__any((int)cond);
}

static __inline__ __attribute__((device)) bool all(bool cond)
{
  return (bool)__all((int)cond);
}
# 209 "/usr/local/cuda-11.7/include/device_atomic_functions.h" 2 3
# 261 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3


# 1 "/usr/local/cuda-11.7/include/crt/device_functions.hpp" 1 3
# 79 "/usr/local/cuda-11.7/include/crt/device_functions.hpp" 3
# 1 "/usr/local/cuda-11.7/include/builtin_types.h" 1 3
# 80 "/usr/local/cuda-11.7/include/crt/device_functions.hpp" 2 3

# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 82 "/usr/local/cuda-11.7/include/crt/device_functions.hpp" 2 3


static __inline__ __attribute__((device)) int mulhi(const int a, const int b)
{
  return __mulhi(a, b);
}

static __inline__ __attribute__((device)) unsigned int mulhi(const unsigned int a, const unsigned int b)
{
  return __umulhi(a, b);
}

static __inline__ __attribute__((device)) unsigned int mulhi(const int a, const unsigned int b)
{
  return __umulhi(static_cast<unsigned int>(a), b);
}

static __inline__ __attribute__((device)) unsigned int mulhi(const unsigned int a, const int b)
{
  return __umulhi(a, static_cast<unsigned int>(b));
}

static __inline__ __attribute__((device)) long long int mul64hi(const long long int a, const long long int b)
{
  return __mul64hi(a, b);
}

static __inline__ __attribute__((device)) unsigned long long int mul64hi(const unsigned long long int a, const unsigned long long int b)
{
  return __umul64hi(a, b);
}

static __inline__ __attribute__((device)) unsigned long long int mul64hi(const long long int a, const unsigned long long int b)
{
  return __umul64hi(static_cast<unsigned long long int>(a), b);
}

static __inline__ __attribute__((device)) unsigned long long int mul64hi(const unsigned long long int a, const long long int b)
{
  return __umul64hi(a, static_cast<unsigned long long int>(b));
}

static __inline__ __attribute__((device)) int float_as_int(const float a)
{
  return __float_as_int(a);
}

static __inline__ __attribute__((device)) float int_as_float(const int a)
{
  return __int_as_float(a);
}

static __inline__ __attribute__((device)) unsigned int float_as_uint(const float a)
{
  return __float_as_uint(a);
}

static __inline__ __attribute__((device)) float uint_as_float(const unsigned int a)
{
  return __uint_as_float(a);
}
static __inline__ __attribute__((device)) float saturate(const float a)
{
  return __saturatef(a);
}

static __inline__ __attribute__((device)) int mul24(const int a, const int b)
{
  return __mul24(a, b);
}

static __inline__ __attribute__((device)) unsigned int umul24(const unsigned int a, const unsigned int b)
{
  return __umul24(a, b);
}

static __inline__ __attribute__((device)) int float2int(const float a, const enum cudaRoundMode mode)
{
  return (mode == cudaRoundNearest) ? __float2int_rn(a) :
         (mode == cudaRoundPosInf ) ? __float2int_ru(a) :
         (mode == cudaRoundMinInf ) ? __float2int_rd(a) :
                                      __float2int_rz(a);
}

static __inline__ __attribute__((device)) unsigned int float2uint(const float a, const enum cudaRoundMode mode)
{
  return (mode == cudaRoundNearest) ? __float2uint_rn(a) :
         (mode == cudaRoundPosInf ) ? __float2uint_ru(a) :
         (mode == cudaRoundMinInf ) ? __float2uint_rd(a) :
                                      __float2uint_rz(a);
}

static __inline__ __attribute__((device)) float int2float(const int a, const enum cudaRoundMode mode)
{
  return (mode == cudaRoundZero ) ? __int2float_rz(a) :
         (mode == cudaRoundPosInf) ? __int2float_ru(a) :
         (mode == cudaRoundMinInf) ? __int2float_rd(a) :
                                     __int2float_rn(a);
}

static __inline__ __attribute__((device)) float uint2float(const unsigned int a, const enum cudaRoundMode mode)
{
  return (mode == cudaRoundZero ) ? __uint2float_rz(a) :
         (mode == cudaRoundPosInf) ? __uint2float_ru(a) :
         (mode == cudaRoundMinInf) ? __uint2float_rd(a) :
                                     __uint2float_rn(a);
}
# 266 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/local/cuda-11.7/include/crt/device_double_functions.hpp" 1 3
# 83 "/usr/local/cuda-11.7/include/crt/device_double_functions.hpp" 3
# 1 "/usr/local/cuda-11.7/include/builtin_types.h" 1 3
# 84 "/usr/local/cuda-11.7/include/crt/device_double_functions.hpp" 2 3

# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 86 "/usr/local/cuda-11.7/include/crt/device_double_functions.hpp" 2 3


static __inline__ __attribute__((device)) double fma(double a, double b, double c, enum cudaRoundMode mode)
{
  return mode == cudaRoundZero ? __fma_rz(a, b, c) :
         mode == cudaRoundPosInf ? __fma_ru(a, b, c) :
         mode == cudaRoundMinInf ? __fma_rd(a, b, c) :
                                   __fma_rn(a, b, c);
}

static __inline__ __attribute__((device)) double dmul(double a, double b, enum cudaRoundMode mode)
{
  return mode == cudaRoundZero ? __dmul_rz(a, b) :
         mode == cudaRoundPosInf ? __dmul_ru(a, b) :
         mode == cudaRoundMinInf ? __dmul_rd(a, b) :
                                   __dmul_rn(a, b);
}

static __inline__ __attribute__((device)) double dadd(double a, double b, enum cudaRoundMode mode)
{
  return mode == cudaRoundZero ? __dadd_rz(a, b) :
         mode == cudaRoundPosInf ? __dadd_ru(a, b) :
         mode == cudaRoundMinInf ? __dadd_rd(a, b) :
                                   __dadd_rn(a, b);
}

static __inline__ __attribute__((device)) double dsub(double a, double b, enum cudaRoundMode mode)
{
  return mode == cudaRoundZero ? __dsub_rz(a, b) :
         mode == cudaRoundPosInf ? __dsub_ru(a, b) :
         mode == cudaRoundMinInf ? __dsub_rd(a, b) :
                                   __dsub_rn(a, b);
}

static __inline__ __attribute__((device)) int double2int(double a, enum cudaRoundMode mode)
{
  return mode == cudaRoundNearest ? __double2int_rn(a) :
         mode == cudaRoundPosInf ? __double2int_ru(a) :
         mode == cudaRoundMinInf ? __double2int_rd(a) :
                                    __double2int_rz(a);
}

static __inline__ __attribute__((device)) unsigned int double2uint(double a, enum cudaRoundMode mode)
{
  return mode == cudaRoundNearest ? __double2uint_rn(a) :
         mode == cudaRoundPosInf ? __double2uint_ru(a) :
         mode == cudaRoundMinInf ? __double2uint_rd(a) :
                                    __double2uint_rz(a);
}

static __inline__ __attribute__((device)) long long int double2ll(double a, enum cudaRoundMode mode)
{
  return mode == cudaRoundNearest ? __double2ll_rn(a) :
         mode == cudaRoundPosInf ? __double2ll_ru(a) :
         mode == cudaRoundMinInf ? __double2ll_rd(a) :
                                    __double2ll_rz(a);
}

static __inline__ __attribute__((device)) unsigned long long int double2ull(double a, enum cudaRoundMode mode)
{
  return mode == cudaRoundNearest ? __double2ull_rn(a) :
         mode == cudaRoundPosInf ? __double2ull_ru(a) :
         mode == cudaRoundMinInf ? __double2ull_rd(a) :
                                    __double2ull_rz(a);
}

static __inline__ __attribute__((device)) double ll2double(long long int a, enum cudaRoundMode mode)
{
  return mode == cudaRoundZero ? __ll2double_rz(a) :
         mode == cudaRoundPosInf ? __ll2double_ru(a) :
         mode == cudaRoundMinInf ? __ll2double_rd(a) :
                                   __ll2double_rn(a);
}

static __inline__ __attribute__((device)) double ull2double(unsigned long long int a, enum cudaRoundMode mode)
{
  return mode == cudaRoundZero ? __ull2double_rz(a) :
         mode == cudaRoundPosInf ? __ull2double_ru(a) :
         mode == cudaRoundMinInf ? __ull2double_rd(a) :
                                   __ull2double_rn(a);
}

static __inline__ __attribute__((device)) double int2double(int a, enum cudaRoundMode mode)
{
  return (double)a;
}

static __inline__ __attribute__((device)) double uint2double(unsigned int a, enum cudaRoundMode mode)
{
  return (double)a;
}

static __inline__ __attribute__((device)) double float2double(float a, enum cudaRoundMode mode)
{
  return (double)a;
}
# 267 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3


# 1 "/usr/local/cuda-11.7/include/sm_20_atomic_functions.hpp" 1 3
# 75 "/usr/local/cuda-11.7/include/sm_20_atomic_functions.hpp" 3
static __inline__ __attribute__((device)) float atomicAdd(float *address, float val)
{
  return __fAtomicAdd(address, val);
}
# 274 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 286 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/sm_20_intrinsics.hpp" 1 3
# 75 "/usr/local/cuda-11.7/include/sm_20_intrinsics.hpp" 3
static __inline__ __attribute__((device)) unsigned int ballot(bool pred)
{
  return __ballot((int)pred);
}

static __inline__ __attribute__((device)) int syncthreads_count(bool pred)
{
  return __syncthreads_count((int)pred);
}

static __inline__ __attribute__((device)) bool syncthreads_and(bool pred)
{
  return (bool)__syncthreads_and((int)pred);
}

static __inline__ __attribute__((device)) bool syncthreads_or(bool pred)
{
  return (bool)__syncthreads_or((int)pred);
}


extern "C" {
  __attribute__((device)) unsigned __nv_isGlobal_impl(const void *);
  __attribute__((device)) unsigned __nv_isShared_impl(const void *);
  __attribute__((device)) unsigned __nv_isConstant_impl(const void *);
  __attribute__((device)) unsigned __nv_isLocal_impl(const void *);
  __attribute__((device)) unsigned __nv_isGridConstant_impl(const void *);
}

static __inline__ __attribute__((device)) unsigned int __ignored_cuda___isGlobal(const void *ptr)
{
  return __nv_isGlobal_impl(ptr);
}

static __inline__ __attribute__((device)) unsigned int __ignored_cuda___isShared(const void *ptr)
{
  return __nv_isShared_impl(ptr);
}

static __inline__ __attribute__((device)) unsigned int __ignored_cuda___isConstant(const void *ptr)
{
  return __nv_isConstant_impl(ptr);
}

static __inline__ __attribute__((device)) unsigned int __ignored_cuda___isLocal(const void *ptr)
{
  return __nv_isLocal_impl(ptr);
}
# 131 "/usr/local/cuda-11.7/include/sm_20_intrinsics.hpp" 3
extern "C" {
  __attribute__((device)) size_t __nv_cvta_generic_to_global_impl(const void *);
  __attribute__((device)) size_t __nv_cvta_generic_to_shared_impl(const void *);
  __attribute__((device)) size_t __nv_cvta_generic_to_constant_impl(const void *);
  __attribute__((device)) size_t __nv_cvta_generic_to_local_impl(const void *);
  __attribute__((device)) void * __nv_cvta_global_to_generic_impl(size_t);
  __attribute__((device)) void * __nv_cvta_shared_to_generic_impl(size_t);
  __attribute__((device)) void * __nv_cvta_constant_to_generic_impl(size_t);
  __attribute__((device)) void * __nv_cvta_local_to_generic_impl(size_t);
}

static __inline__ __attribute__((device)) size_t __cvta_generic_to_global(const void *p)
{
  return __nv_cvta_generic_to_global_impl(p);
}

static __inline__ __attribute__((device)) size_t __cvta_generic_to_shared(const void *p)
{
  return __nv_cvta_generic_to_shared_impl(p);
}

static __inline__ __attribute__((device)) size_t __cvta_generic_to_constant(const void *p)
{
  return __nv_cvta_generic_to_constant_impl(p);
}

static __inline__ __attribute__((device)) size_t __cvta_generic_to_local(const void *p)
{
  return __nv_cvta_generic_to_local_impl(p);
}

static __inline__ __attribute__((device)) void * __cvta_global_to_generic(size_t rawbits)
{
  return __nv_cvta_global_to_generic_impl(rawbits);
}

static __inline__ __attribute__((device)) void * __cvta_shared_to_generic(size_t rawbits)
{
  return __nv_cvta_shared_to_generic_impl(rawbits);
}

static __inline__ __attribute__((device)) void * __cvta_constant_to_generic(size_t rawbits)
{
  return __nv_cvta_constant_to_generic_impl(rawbits);
}

static __inline__ __attribute__((device)) void * __cvta_local_to_generic(size_t rawbits)
{
  return __nv_cvta_local_to_generic_impl(rawbits);
}
# 287 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3


static __attribute__((device)) __inline__ __attribute__((always_inline)) __attribute__((const)) unsigned int __isGlobal(const void *p) {
  return __nvvm_isspacep_global(p);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) __attribute__((const)) unsigned int __isShared(const void *p) {
  return __nvvm_isspacep_shared(p);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) __attribute__((const)) unsigned int __isConstant(const void *p) {
  return __nvvm_isspacep_const(p);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) __attribute__((const)) unsigned int __isLocal(const void *p) {
  return __nvvm_isspacep_local(p);
}

# 1 "/usr/local/cuda-11.7/include/sm_32_atomic_functions.hpp" 1 3
# 77 "/usr/local/cuda-11.7/include/sm_32_atomic_functions.hpp" 3
static __inline__ __attribute__((device)) long long atomicMin(long long *address, long long val)
{
    return __illAtomicMin(address, val);
}

static __inline__ __attribute__((device)) long long atomicMax(long long *address, long long val)
{
    return __illAtomicMax(address, val);
}

static __inline__ __attribute__((device)) long long atomicAnd(long long *address, long long val)
{
    return __llAtomicAnd(address, val);
}

static __inline__ __attribute__((device)) long long atomicOr(long long *address, long long val)
{
    return __llAtomicOr(address, val);
}

static __inline__ __attribute__((device)) long long atomicXor(long long *address, long long val)
{
    return __llAtomicXor(address, val);
}

static __inline__ __attribute__((device)) unsigned long long atomicMin(unsigned long long *address, unsigned long long val)
{
    return __ullAtomicMin(address, val);
}

static __inline__ __attribute__((device)) unsigned long long atomicMax(unsigned long long *address, unsigned long long val)
{
    return __ullAtomicMax(address, val);
}

static __inline__ __attribute__((device)) unsigned long long atomicAnd(unsigned long long *address, unsigned long long val)
{
    return __ullAtomicAnd(address, val);
}

static __inline__ __attribute__((device)) unsigned long long atomicOr(unsigned long long *address, unsigned long long val)
{
    return __ullAtomicOr(address, val);
}

static __inline__ __attribute__((device)) unsigned long long atomicXor(unsigned long long *address, unsigned long long val)
{
    return __ullAtomicXor(address, val);
}
# 307 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 319 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/sm_60_atomic_functions.hpp" 1 3
# 77 "/usr/local/cuda-11.7/include/sm_60_atomic_functions.hpp" 3
static __inline__ __attribute__((device)) double atomicAdd(double *address, double val)
{
  return __dAtomicAdd(address, val);
}

static __inline__ __attribute__((device))
int atomicAdd_block(int *address, int val)
{
  return __iAtomicAdd_block(address, val);
}

static __inline__ __attribute__((device))
int atomicAdd_system(int *address, int val)
{
  return __iAtomicAdd_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicAdd_block(unsigned int *address, unsigned int val)
{
  return __uAtomicAdd_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicAdd_system(unsigned int *address, unsigned int val)
{
  return __uAtomicAdd_system(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicAdd_block(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicAdd_block(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicAdd_system(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicAdd_system(address, val);
}

static __inline__ __attribute__((device))
float atomicAdd_block(float *address, float val)
{
  return __fAtomicAdd_block(address, val);
}

static __inline__ __attribute__((device))
float atomicAdd_system(float *address, float val)
{
  return __fAtomicAdd_system(address, val);
}

static __inline__ __attribute__((device))
double atomicAdd_block(double *address, double val)
{
  return __dAtomicAdd_block(address, val);
}

static __inline__ __attribute__((device))
double atomicAdd_system(double *address, double val)
{
  return __dAtomicAdd_system(address, val);
}

static __inline__ __attribute__((device))
int atomicSub_block(int *address, int val)
{
  return __iAtomicAdd_block(address, (unsigned int)-(int)val);
}

static __inline__ __attribute__((device))
int atomicSub_system(int *address, int val)
{
  return __iAtomicAdd_system(address, (unsigned int)-(int)val);
}

static __inline__ __attribute__((device))
unsigned int atomicSub_block(unsigned int *address, unsigned int val)
{
  return __uAtomicAdd_block(address, (unsigned int)-(int)val);
}

static __inline__ __attribute__((device))
unsigned int atomicSub_system(unsigned int *address, unsigned int val)
{
  return __uAtomicAdd_system(address, (unsigned int)-(int)val);
}

static __inline__ __attribute__((device))
int atomicExch_block(int *address, int val)
{
  return __iAtomicExch_block(address, val);
}

static __inline__ __attribute__((device))
int atomicExch_system(int *address, int val)
{
  return __iAtomicExch_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicExch_block(unsigned int *address, unsigned int val)
{
  return __uAtomicExch_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicExch_system(unsigned int *address, unsigned int val)
{
  return __uAtomicExch_system(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicExch_block(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicExch_block(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicExch_system(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicExch_system(address, val);
}

static __inline__ __attribute__((device))
float atomicExch_block(float *address, float val)
{
  return __fAtomicExch_block(address, val);
}

static __inline__ __attribute__((device))
float atomicExch_system(float *address, float val)
{
  return __fAtomicExch_system(address, val);
}

static __inline__ __attribute__((device))
int atomicMin_block(int *address, int val)
{
  return __iAtomicMin_block(address, val);
}

static __inline__ __attribute__((device))
int atomicMin_system(int *address, int val)
{
  return __iAtomicMin_system(address, val);
}

static __inline__ __attribute__((device))
long long atomicMin_block(long long *address, long long val)
{
  return __illAtomicMin_block(address, val);
}

static __inline__ __attribute__((device))
long long atomicMin_system(long long *address, long long val)
{
  return __illAtomicMin_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicMin_block(unsigned int *address, unsigned int val)
{
  return __uAtomicMin_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicMin_system(unsigned int *address, unsigned int val)
{
  return __uAtomicMin_system(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicMin_block(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicMin_block(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicMin_system(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicMin_system(address, val);
}

static __inline__ __attribute__((device))
int atomicMax_block(int *address, int val)
{
  return __iAtomicMax_block(address, val);
}

static __inline__ __attribute__((device))
int atomicMax_system(int *address, int val)
{
  return __iAtomicMax_system(address, val);
}

static __inline__ __attribute__((device))
long long atomicMax_block(long long *address, long long val)
{
  return __illAtomicMax_block(address, val);
}

static __inline__ __attribute__((device))
long long atomicMax_system(long long *address, long long val)
{
  return __illAtomicMax_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicMax_block(unsigned int *address, unsigned int val)
{
  return __uAtomicMax_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicMax_system(unsigned int *address, unsigned int val)
{
  return __uAtomicMax_system(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicMax_block(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicMax_block(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicMax_system(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicMax_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicInc_block(unsigned int *address, unsigned int val)
{
  return __uAtomicInc_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicInc_system(unsigned int *address, unsigned int val)
{
  return __uAtomicInc_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicDec_block(unsigned int *address, unsigned int val)
{
  return __uAtomicDec_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicDec_system(unsigned int *address, unsigned int val)
{
  return __uAtomicDec_system(address, val);
}

static __inline__ __attribute__((device))
int atomicCAS_block(int *address, int compare, int val)
{
  return __iAtomicCAS_block(address, compare, val);
}

static __inline__ __attribute__((device))
int atomicCAS_system(int *address, int compare, int val)
{
  return __iAtomicCAS_system(address, compare, val);
}

static __inline__ __attribute__((device))
unsigned int atomicCAS_block(unsigned int *address, unsigned int compare,
                             unsigned int val)
{
  return __uAtomicCAS_block(address, compare, val);
}

static __inline__ __attribute__((device))
unsigned int atomicCAS_system(unsigned int *address, unsigned int compare,
                              unsigned int val)
{
  return __uAtomicCAS_system(address, compare, val);
}

static __inline__ __attribute__((device))
unsigned long long int atomicCAS_block(unsigned long long int *address,
                                       unsigned long long int compare,
                                       unsigned long long int val)
{
  return __ullAtomicCAS_block(address, compare, val);
}

static __inline__ __attribute__((device))
unsigned long long int atomicCAS_system(unsigned long long int *address,
                                        unsigned long long int compare,
                                        unsigned long long int val)
{
  return __ullAtomicCAS_system(address, compare, val);
}

static __inline__ __attribute__((device))
int atomicAnd_block(int *address, int val)
{
  return __iAtomicAnd_block(address, val);
}

static __inline__ __attribute__((device))
int atomicAnd_system(int *address, int val)
{
  return __iAtomicAnd_system(address, val);
}

static __inline__ __attribute__((device))
long long atomicAnd_block(long long *address, long long val)
{
  return __llAtomicAnd_block(address, val);
}

static __inline__ __attribute__((device))
long long atomicAnd_system(long long *address, long long val)
{
  return __llAtomicAnd_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicAnd_block(unsigned int *address, unsigned int val)
{
  return __uAtomicAnd_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicAnd_system(unsigned int *address, unsigned int val)
{
  return __uAtomicAnd_system(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicAnd_block(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicAnd_block(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicAnd_system(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicAnd_system(address, val);
}

static __inline__ __attribute__((device))
int atomicOr_block(int *address, int val)
{
  return __iAtomicOr_block(address, val);
}

static __inline__ __attribute__((device))
int atomicOr_system(int *address, int val)
{
  return __iAtomicOr_system(address, val);
}

static __inline__ __attribute__((device))
long long atomicOr_block(long long *address, long long val)
{
  return __llAtomicOr_block(address, val);
}

static __inline__ __attribute__((device))
long long atomicOr_system(long long *address, long long val)
{
  return __llAtomicOr_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicOr_block(unsigned int *address, unsigned int val)
{
  return __uAtomicOr_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicOr_system(unsigned int *address, unsigned int val)
{
  return __uAtomicOr_system(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicOr_block(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicOr_block(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicOr_system(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicOr_system(address, val);
}

static __inline__ __attribute__((device))
int atomicXor_block(int *address, int val)
{
  return __iAtomicXor_block(address, val);
}

static __inline__ __attribute__((device))
int atomicXor_system(int *address, int val)
{
  return __iAtomicXor_system(address, val);
}

static __inline__ __attribute__((device))
long long atomicXor_block(long long *address, long long val)
{
  return __llAtomicXor_block(address, val);
}

static __inline__ __attribute__((device))
long long atomicXor_system(long long *address, long long val)
{
  return __llAtomicXor_system(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicXor_block(unsigned int *address, unsigned int val)
{
  return __uAtomicXor_block(address, val);
}

static __inline__ __attribute__((device))
unsigned int atomicXor_system(unsigned int *address, unsigned int val)
{
  return __uAtomicXor_system(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicXor_block(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicXor_block(address, val);
}

static __inline__ __attribute__((device))
unsigned long long atomicXor_system(unsigned long long *address, unsigned long long val)
{
  return __ullAtomicXor_system(address, val);
}
# 320 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/local/cuda-11.7/include/sm_61_intrinsics.hpp" 1 3
# 79 "/usr/local/cuda-11.7/include/sm_61_intrinsics.hpp" 3
static __attribute__((device)) __inline__ int __dp4a(int srcA, int srcB, int c) {
    int ret;
    asm volatile ("dp4a.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ unsigned int __dp4a(unsigned int srcA, unsigned int srcB, unsigned int c) {
    unsigned int ret;
    asm volatile ("dp4a.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ int __dp4a(char4 srcA, char4 srcB, int c) {
    int ret;
    asm volatile ("dp4a.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(int *)&srcA), "r"(*(int *)&srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ unsigned int __dp4a(uchar4 srcA, uchar4 srcB, unsigned int c) {
    unsigned int ret;
    asm volatile ("dp4a.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(unsigned int *)&srcA), "r"(*(unsigned int *)&srcB), "r"(c));
    return ret;
}


static __attribute__((device)) __inline__ int __dp2a_lo(int srcA, int srcB, int c) {
    int ret;
    asm volatile ("dp2a.lo.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ unsigned int __dp2a_lo(unsigned int srcA, unsigned int srcB, unsigned int c) {
    unsigned int ret;
    asm volatile ("dp2a.lo.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ int __dp2a_lo(short2 srcA, char4 srcB, int c) {
    int ret;
    asm volatile ("dp2a.lo.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(int *)&srcA), "r"(*(int *)&srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ unsigned int __dp2a_lo(ushort2 srcA, uchar4 srcB, unsigned int c) {
    unsigned int ret;
    asm volatile ("dp2a.lo.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(unsigned int *)&srcA), "r"(*(unsigned int *)&srcB), "r"(c));
    return ret;
}


static __attribute__((device)) __inline__ int __dp2a_hi(int srcA, int srcB, int c) {
    int ret;
    asm volatile ("dp2a.hi.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ unsigned int __dp2a_hi(unsigned int srcA, unsigned int srcB, unsigned int c) {
    unsigned int ret;
    asm volatile ("dp2a.hi.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(srcA), "r"(srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ int __dp2a_hi(short2 srcA, char4 srcB, int c) {
    int ret;
    asm volatile ("dp2a.hi.s32.s32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(int *)&srcA), "r"(*(int *)&srcB), "r"(c));
    return ret;
}

static __attribute__((device)) __inline__ unsigned int __dp2a_hi(ushort2 srcA, uchar4 srcB, unsigned int c) {
    unsigned int ret;
    asm volatile ("dp2a.hi.u32.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(*(unsigned int *)&srcA), "r"(*(unsigned int *)&srcB), "r"(c));
    return ret;
}
# 321 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 349 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 1 3
# 77 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 3
# 1 "/usr/local/cuda-11.7/include/builtin_types.h" 1 3
# 78 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 2 3
# 1 "/usr/local/cuda-11.7/include/crt/host_defines.h" 1 3
# 79 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 2 3
# 587 "/usr/local/cuda-11.7/include/crt/math_functions.hpp" 3
static inline __attribute__((device)) float logb(const float a)
{
  return logbf(a);
}

static inline __attribute__((device)) int ilogb(const float a)
{
  return ilogbf(a);
}

static inline __attribute__((device)) float scalbn(const float a, const int b)
{
  return scalbnf(a, b);
}

static inline __attribute__((device)) float scalbln(const float a, const long int b)
{
  return scalblnf(a, b);
}

static inline __attribute__((device)) float exp2(const float a)
{
  return exp2f(a);
}

static inline __attribute__((device)) float expm1(const float a)
{
  return expm1f(a);
}

static inline __attribute__((device)) float log2(const float a)
{
  return log2f(a);
}

static inline __attribute__((device)) float log1p(const float a)
{
  return log1pf(a);
}

static inline __attribute__((device)) float acosh(const float a)
{
  return acoshf(a);
}

static inline __attribute__((device)) float asinh(const float a)
{
  return asinhf(a);
}

static inline __attribute__((device)) float atanh(const float a)
{
  return atanhf(a);
}

static inline __attribute__((device)) float hypot(const float a, const float b)
{
  return hypotf(a, b);
}

static inline __attribute__((device)) float cbrt(const float a)
{
  return cbrtf(a);
}

static inline __attribute__((device)) float erf(const float a)
{
  return erff(a);
}

static inline __attribute__((device)) float erfc(const float a)
{
  return erfcf(a);
}

static inline __attribute__((device)) float lgamma(const float a)
{
  return lgammaf(a);
}

static inline __attribute__((device)) float tgamma(const float a)
{
  return tgammaf(a);
}

static inline __attribute__((device)) float copysign(const float a, const float b)
{
  return copysignf(a, b);
}

static inline __attribute__((device)) float nextafter(const float a, const float b)
{
  return nextafterf(a, b);
}

static inline __attribute__((device)) float remainder(const float a, const float b)
{
  return remainderf(a, b);
}

static inline __attribute__((device)) float remquo(const float a, const float b, int *quo)
{
  return remquof(a, b, quo);
}

static inline __attribute__((device)) float round(const float a)
{
  return roundf(a);
}

static inline __attribute__((device)) long int lround(const float a)
{
  return lroundf(a);
}

static inline __attribute__((device)) long long int llround(const float a)
{
  return llroundf(a);
}

static inline __attribute__((device)) float trunc(const float a)
{
  return truncf(a);
}

static inline __attribute__((device)) float rint(const float a)
{
  return rintf(a);
}

static inline __attribute__((device)) long int lrint(const float a)
{
  return lrintf(a);
}

static inline __attribute__((device)) long long int llrint(const float a)
{
  return llrintf(a);
}

static inline __attribute__((device)) float nearbyint(const float a)
{
  return nearbyintf(a);
}

static inline __attribute__((device)) float fdim(const float a, const float b)
{
  return fdimf(a, b);
}

static inline __attribute__((device)) float fma(const float a, const float b, const float c)
{
  return fmaf(a, b, c);
}

static inline __attribute__((device)) float fmax(const float a, const float b)
{
  return fmaxf(a, b);
}

static inline __attribute__((device)) float fmin(const float a, const float b)
{
  return fminf(a, b);
}


static inline __attribute__((device)) float exp10(const float a)
{
  return exp10f(a);
}

static inline __attribute__((device)) float rsqrt(const float a)
{
  return rsqrtf(a);
}

static inline __attribute__((device)) float rcbrt(const float a)
{
  return rcbrtf(a);
}

static inline __attribute__((device)) float sinpi(const float a)
{
  return sinpif(a);
}

static inline __attribute__((device)) float cospi(const float a)
{
  return cospif(a);
}

static inline __attribute__((device)) void sincospi(const float a, float *const sptr, float *const cptr)
{
  sincospif(a, sptr, cptr);
}

static inline __attribute__((device)) void sincos(const float a, float *const sptr, float *const cptr)
{
  sincosf(a, sptr, cptr);
}

static inline __attribute__((device)) float j0(const float a)
{
  return j0f(a);
}

static inline __attribute__((device)) float j1(const float a)
{
  return j1f(a);
}

static inline __attribute__((device)) float jn(const int n, const float a)
{
  return jnf(n, a);
}

static inline __attribute__((device)) float y0(const float a)
{
  return y0f(a);
}

static inline __attribute__((device)) float y1(const float a)
{
  return y1f(a);
}

static inline __attribute__((device)) float yn(const int n, const float a)
{
  return ynf(n, a);
}

static inline __attribute__((device)) float cyl_bessel_i0(const float a)
{
  return cyl_bessel_i0f(a);
}

static inline __attribute__((device)) float cyl_bessel_i1(const float a)
{
  return cyl_bessel_i1f(a);
}

static inline __attribute__((device)) float erfinv(const float a)
{
  return erfinvf(a);
}

static inline __attribute__((device)) float erfcinv(const float a)
{
  return erfcinvf(a);
}

static inline __attribute__((device)) float normcdfinv(const float a)
{
  return normcdfinvf(a);
}

static inline __attribute__((device)) float normcdf(const float a)
{
  return normcdff(a);
}

static inline __attribute__((device)) float erfcx(const float a)
{
  return erfcxf(a);
}

static inline __attribute__((device)) double copysign(const double a, const float b)
{
  return copysign(a, static_cast<double>(b));
}

static inline __attribute__((device)) double copysign(const float a, const double b)
{
  return copysign(static_cast<double>(a), b);
}

static inline __attribute__((device)) unsigned int min(const unsigned int a, const unsigned int b)
{
  return umin(a, b);
}

static inline __attribute__((device)) unsigned int min(const int a, const unsigned int b)
{
  return umin(static_cast<unsigned int>(a), b);
}

static inline __attribute__((device)) unsigned int min(const unsigned int a, const int b)
{
  return umin(a, static_cast<unsigned int>(b));
}

static inline __attribute__((device)) long int min(const long int a, const long int b)
{
  long int retval;


  if (sizeof(long int) == sizeof(int)) {


    retval = static_cast<long int>(min(static_cast<int>(a), static_cast<int>(b)));
  } else {
    retval = static_cast<long int>(llmin(static_cast<long long int>(a), static_cast<long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) unsigned long int min(const unsigned long int a, const unsigned long int b)
{
  unsigned long int retval;


  if (sizeof(unsigned long int) == sizeof(unsigned int)) {


    retval = static_cast<unsigned long int>(umin(static_cast<unsigned int>(a), static_cast<unsigned int>(b)));
  } else {
    retval = static_cast<unsigned long int>(ullmin(static_cast<unsigned long long int>(a), static_cast<unsigned long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) unsigned long int min(const long int a, const unsigned long int b)
{
  unsigned long int retval;


  if (sizeof(unsigned long int) == sizeof(unsigned int)) {


    retval = static_cast<unsigned long int>(umin(static_cast<unsigned int>(a), static_cast<unsigned int>(b)));
  } else {
    retval = static_cast<unsigned long int>(ullmin(static_cast<unsigned long long int>(a), static_cast<unsigned long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) unsigned long int min(const unsigned long int a, const long int b)
{
  unsigned long int retval;


  if (sizeof(unsigned long int) == sizeof(unsigned int)) {


    retval = static_cast<unsigned long int>(umin(static_cast<unsigned int>(a), static_cast<unsigned int>(b)));
  } else {
    retval = static_cast<unsigned long int>(ullmin(static_cast<unsigned long long int>(a), static_cast<unsigned long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) long long int min(const long long int a, const long long int b)
{
  return llmin(a, b);
}

static inline __attribute__((device)) unsigned long long int min(const unsigned long long int a, const unsigned long long int b)
{
  return ullmin(a, b);
}

static inline __attribute__((device)) unsigned long long int min(const long long int a, const unsigned long long int b)
{
  return ullmin(static_cast<unsigned long long int>(a), b);
}

static inline __attribute__((device)) unsigned long long int min(const unsigned long long int a, const long long int b)
{
  return ullmin(a, static_cast<unsigned long long int>(b));
}

static inline __attribute__((device)) float min(const float a, const float b)
{
  return fminf(a, b);
}

static inline __attribute__((device)) double min(const double a, const double b)
{
  return fmin(a, b);
}

static inline __attribute__((device)) double min(const float a, const double b)
{
  return fmin(static_cast<double>(a), b);
}

static inline __attribute__((device)) double min(const double a, const float b)
{
  return fmin(a, static_cast<double>(b));
}

static inline __attribute__((device)) unsigned int max(const unsigned int a, const unsigned int b)
{
  return umax(a, b);
}

static inline __attribute__((device)) unsigned int max(const int a, const unsigned int b)
{
  return umax(static_cast<unsigned int>(a), b);
}

static inline __attribute__((device)) unsigned int max(const unsigned int a, const int b)
{
  return umax(a, static_cast<unsigned int>(b));
}

static inline __attribute__((device)) long int max(const long int a, const long int b)
{
  long int retval;


  if (sizeof(long int) == sizeof(int)) {


    retval = static_cast<long int>(max(static_cast<int>(a), static_cast<int>(b)));
  } else {
    retval = static_cast<long int>(llmax(static_cast<long long int>(a), static_cast<long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) unsigned long int max(const unsigned long int a, const unsigned long int b)
{
  unsigned long int retval;


  if (sizeof(unsigned long int) == sizeof(unsigned int)) {


    retval = static_cast<unsigned long int>(umax(static_cast<unsigned int>(a), static_cast<unsigned int>(b)));
  } else {
    retval = static_cast<unsigned long int>(ullmax(static_cast<unsigned long long int>(a), static_cast<unsigned long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) unsigned long int max(const long int a, const unsigned long int b)
{
  unsigned long int retval;


  if (sizeof(unsigned long int) == sizeof(unsigned int)) {


    retval = static_cast<unsigned long int>(umax(static_cast<unsigned int>(a), static_cast<unsigned int>(b)));
  } else {
    retval = static_cast<unsigned long int>(ullmax(static_cast<unsigned long long int>(a), static_cast<unsigned long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) unsigned long int max(const unsigned long int a, const long int b)
{
  unsigned long int retval;


  if (sizeof(unsigned long int) == sizeof(unsigned int)) {


    retval = static_cast<unsigned long int>(umax(static_cast<unsigned int>(a), static_cast<unsigned int>(b)));
  } else {
    retval = static_cast<unsigned long int>(ullmax(static_cast<unsigned long long int>(a), static_cast<unsigned long long int>(b)));
  }
  return retval;
}

static inline __attribute__((device)) long long int max(const long long int a, const long long int b)
{
  return llmax(a, b);
}

static inline __attribute__((device)) unsigned long long int max(const unsigned long long int a, const unsigned long long int b)
{
  return ullmax(a, b);
}

static inline __attribute__((device)) unsigned long long int max(const long long int a, const unsigned long long int b)
{
  return ullmax(static_cast<unsigned long long int>(a), b);
}

static inline __attribute__((device)) unsigned long long int max(const unsigned long long int a, const long long int b)
{
  return ullmax(a, static_cast<unsigned long long int>(b));
}

static inline __attribute__((device)) float max(const float a, const float b)
{
  return fmaxf(a, b);
}

static inline __attribute__((device)) double max(const double a, const double b)
{
  return fmax(a, b);
}

static inline __attribute__((device)) double max(const float a, const double b)
{
  return fmax(static_cast<double>(a), b);
}

static inline __attribute__((device)) double max(const double a, const float b)
{
  return fmax(a, static_cast<double>(b));
}
# 350 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 365 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 1 3
# 54 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
namespace {


namespace __cuda_tex {
# 72 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
constexpr int __tex_len(const char *s) {
  return (s[0] == 0) ? 0
         : (s[1] == 0) ? 1
         : (s[2] == 0) ? 2
         : (s[3] == 0) ? 3
         : (s[4] == 0) ? 4
         : (s[5] == 0) ? 5
         : (s[6] == 0) ? 6
         : (s[7] == 0) ? 7
         : (s[8] == 0) ? 8
         : (s[9] == 0) ? 9
         : (s[10] == 0) ? 10
         : (s[11] == 0) ? 11
         : (s[12] == 0) ? 12
         : (s[13] == 0) ? 13
         : (s[14] == 0) ? 14
         : (s[15] == 0) ? 15
         : (s[16] == 0) ? 16
         : (s[17] == 0) ? 17
         : (s[18] == 0) ? 18
         : (s[19] == 0) ? 19
         : (s[20] == 0) ? 20
         : (s[21] == 0) ? 21
         : (s[22] == 0) ? 22
         : (s[23] == 0) ? 23
         : (s[24] == 0) ? 24
         : (s[25] == 0) ? 25
         : (s[26] == 0) ? 26
         : (s[27] == 0) ? 27
         : (s[28] == 0) ? 28
         : (s[29] == 0) ? 29
         : (s[30] == 0) ? 30
         : (s[31] == 0) ? 31
                        : 32;
}

constexpr int __tex_hash_map(int c) {
  return (c == 49) ? 10
         : (c == 50) ? 0
         : (c == 51) ? 100
         : (c == 52) ? 30
         : (c == 67) ? 10
         : (c == 68) ? 0
         : (c == 69) ? 25
         : (c == 72) ? 70
         : (c == 77) ? 0
         : (c == 96) ? 44
         : (c == 99) ? 10
         : (c == 100) ? 5
         : (c == 101) ? 60
         : (c == 102) ? 40
         : (c == 103) ? 70
         : (c == 104) ? 25
         : (c == 112) ? 0
         : (c == 114) ? 45
         : (c == 117) ? 5
         : (c == 118) ? 85
         : (c == 120) ? 20
                      : 225;
}

constexpr int __tex_op_hash(const char *str) {
  return __tex_len(str) + __tex_hash_map(str[7] + 1) + __tex_hash_map(str[6]) +
         __tex_hash_map(str[5]) + __tex_hash_map(str[__tex_len(str) - 1]);
}


template <int N> struct __Tag;
# 148 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
template <class> struct __TypeInfoT;

template <> struct __TypeInfoT<float> {
  using __base_t = float;
  using __fetch_t = float4;
};
template <> struct __TypeInfoT<char> {
  using __base_t = char;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<signed char> {
  using __base_t = signed char;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<unsigned char> {
  using __base_t = unsigned char;
  using __fetch_t = uint4;
};
template <> struct __TypeInfoT<short> {
  using __base_t = short;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<unsigned short> {
  using __base_t = unsigned short;
  using __fetch_t = uint4;
};
template <> struct __TypeInfoT<int> {
  using __base_t = int;
  using __fetch_t = int4;
};
template <> struct __TypeInfoT<unsigned int> {
  using __base_t = unsigned int;
  using __fetch_t = uint4;
};


template <class __T> struct __TypeInfoT {
  using __base_t = decltype(__T::x);
  using __fetch_t = typename __TypeInfoT<__base_t>::__fetch_t;
};


template <class __op> struct __tex_fetch_v4;
# 302 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1D_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x) { int4 __r; asm("tex.1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x) { uint4 __r; asm("tex.1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x) { float4 __r; asm("tex.1d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1D_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x) { float4 __r; asm("tex.1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x) { float4 __r; asm("tex.1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x)); return __r; } };

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1Dfetch_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, int __x); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, int __x) { int4 __r; asm("tex.1d.v4" ".s32." "s32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__x)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, int __x) { uint4 __r; asm("tex.1d.v4" ".u32." "s32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__x)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, int __x) { float4 __r; asm("tex.1d.v4" ".f32." "s32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__x)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1Dfetch_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, int __x); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, int __x) { float4 __r; asm("tex.1d.v4" ".s32." "s32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__x)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, int __x) { float4 __r; asm("tex.1d.v4" ".u32." "s32" "\t" "{%0, %1, %2, %3}, [%4, {%5}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__x)); return __r; } };

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex1D")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex1D_v2")> > {};
template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex1Dfetch")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex1Dfetch_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DGrad_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __dPdx, float __dPdy); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __dPdx, float __dPdy) { int4 __r; asm("tex.grad.1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __dPdx, float __dPdy) { uint4 __r; asm("tex.grad.1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __dPdx, float __dPdy) { float4 __r; asm("tex.grad.1d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DGrad_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __dPdx, float __dPdy); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __dPdx, float __dPdy) { float4 __r; asm("tex.grad.1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __dPdx, float __dPdy) { float4 __r; asm("tex.grad.1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], {%6}, {%7};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex1DGrad")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DGrad_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayered_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, int __layer); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, int __layer) { int4 __r; asm("tex.a1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, int __layer) { uint4 __r; asm("tex.a1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, int __layer) { float4 __r; asm("tex.a1d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayered_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, int __layer); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, int __layer) { float4 __r; asm("tex.a1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, int __layer) { float4 __r; asm("tex.a1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex1DLayered")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayered_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayeredGrad_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, int __layer, float __dPdx, float __dPdy); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, int __layer, float __dPdx, float __dPdy) { int4 __r; asm("tex.grad.a1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, int __layer, float __dPdx, float __dPdy) { uint4 __r; asm("tex.grad.a1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, int __layer, float __dPdx, float __dPdy) { float4 __r; asm("tex.grad.a1d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayeredGrad_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, int __layer, float __dPdx, float __dPdy); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, int __layer, float __dPdx, float __dPdy) { float4 __r; asm("tex.grad.a1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, int __layer, float __dPdx, float __dPdy) { float4 __r; asm("tex.grad.a1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7}, {%8};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__dPdx), "f"(__dPdy)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex1DLayeredGrad")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayeredGrad_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayeredLod_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, int __layer, float __level); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, int __layer, float __level) { int4 __r; asm("tex.level.a1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, int __layer, float __level) { uint4 __r; asm("tex.level.a1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, int __layer, float __level) { float4 __r; asm("tex.level.a1d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayeredLod_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, int __layer, float __level); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, int __layer, float __level) { float4 __r; asm("tex.level.a1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, int __layer, float __level) { float4 __r; asm("tex.level.a1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__level)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex1DLayeredLod")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLayeredLod_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLod_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __level); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __level) { int4 __r; asm("tex.level.1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], %6;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__level)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __level) { uint4 __r; asm("tex.level.1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], %6;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __level) { float4 __r; asm("tex.level.1d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], %6;" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__level)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLod_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __level); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __level) { float4 __r; asm("tex.level.1d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], %6;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __level) { float4 __r; asm("tex.level.1d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5}], %6;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__level)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex1DLod")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex1DLod_v2")> > {};


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2D_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y) { int4 __r; asm("tex.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y) { uint4 __r; asm("tex.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y) { float4 __r; asm("tex.2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2D_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y) { float4 __r; asm("tex.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y) { float4 __r; asm("tex.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } };

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex2D")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex2D_v2")> > {};


                                ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DGrad_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, const float2 *__dPdx, const float2 *__dPdy); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, const float2 *__dPdx, const float2 *__dPdy) { int4 __r; asm("tex.grad.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, const float2 *__dPdx, const float2 *__dPdy) { uint4 __r; asm("tex.grad.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, const float2 *__dPdx, const float2 *__dPdy) { float4 __r; asm("tex.grad.2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DGrad_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, const float2 *__dPdx, const float2 *__dPdy); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, const float2 *__dPdx, const float2 *__dPdy) { float4 __r; asm("tex.grad.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, const float2 *__dPdx, const float2 *__dPdy) { float4 __r; asm("tex.grad.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], {%7, %8}, {%9, %10};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex2DGrad_v2")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DGrad_v2")> > {};
# 368 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                            ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayered_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, int __layer); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __layer) { int4 __r; asm("tex.a2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __layer) { uint4 __r; asm("tex.a2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, int __layer) { float4 __r; asm("tex.a2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayered_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __layer); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __layer) { float4 __r; asm("tex.a2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __layer) { float4 __r; asm("tex.a2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex2DLayered")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayered_v2")> > {};
# 383 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                              ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayeredGrad_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy) { int4 __r; asm("tex.grad.a2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy) { uint4 __r; asm("tex.grad.a2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy) { float4 __r; asm("tex.grad.a2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayeredGrad_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy) { float4 __r; asm("tex.grad.a2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, const float2 *__dPdx, const float2 *__dPdy) { float4 __r; asm("tex.grad.a2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], {%8, %9}, {%10, %11};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdy->x), "f"(__dPdy->y)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex2DLayeredGrad_v2")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayeredGrad_v2")> > {};
# 404 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                     ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayeredLod_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, int __layer, float __level); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, float __level) { int4 __r; asm("tex.level.a2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, float __level) { uint4 __r; asm("tex.level.a2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, float __level) { float4 __r; asm("tex.level.a2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayeredLod_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __layer, float __level); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, float __level) { float4 __r; asm("tex.level.a2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __layer, float __level) { float4 __r; asm("tex.level.a2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__level)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex2DLayeredLod")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLayeredLod_v2")> > {};
# 420 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                                            ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLod_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __level); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __level) { int4 __r; asm("tex.level.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __level) { uint4 __r; asm("tex.level.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __level) { float4 __r; asm("tex.level.2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLod_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __level); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __level) { float4 __r; asm("tex.level.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __level) { float4 __r; asm("tex.level.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}], %7;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__level)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex2DLod")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex2DLod_v2")> > {};
# 435 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                              ;
# 454 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 0> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { int4 __r; asm("tld4.r.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { uint4 __r; asm("tld4.r.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.r.2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 0> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.r.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.r.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__itex2Dgather") * 100 + 0> > : __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 0> > {}; ;;
template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 1> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { int4 __r; asm("tld4.g.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { uint4 __r; asm("tld4.g.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.g.2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 1> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.g.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.g.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__itex2Dgather") * 100 + 1> > : __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 1> > {}; ;;
template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 2> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { int4 __r; asm("tld4.b.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { uint4 __r; asm("tld4.b.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.b.2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 2> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.b.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.b.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__itex2Dgather") * 100 + 2> > : __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 2> > {}; ;;
template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 3> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { int4 __r; asm("tld4.a.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { uint4 __r; asm("tld4.a.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.a.2d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 3> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, int __comp); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.a.2d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, int __comp) { float4 __r; asm("tld4.a.2d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__itex2Dgather") * 100 + 3> > : __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 3> > {}; ;;


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2Dgather_v2")> > {
  template <class __T>
  __attribute__((device)) static __T __run(cudaTextureObject_t __obj, float __x, float __y,
                              int __comp) {
    switch (__comp) {
    case 0:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 0> >::__run<__T>(
          __obj, __x, __y, __comp);
    case 1:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 1> >::__run<__T>(
          __obj, __x, __y, __comp);
    case 2:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 2> >::__run<__T>(
          __obj, __x, __y, __comp);
    case 3:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_v2") * 100 + 3> >::__run<__T>(
          __obj, __x, __y, __comp);
    }
  }
};
template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex2Dgather")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex2Dgather_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex2Dgather_rmnf_v2")> > {
  template <class __T>
  __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x,
                                 float __y, int __comp) {
    switch (__comp) {
    case 0:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 0> >::__run<__T>(
          __obj, __x, __y, __comp);
    case 1:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 1> >::__run<__T>(
          __obj, __x, __y, __comp);
    case 2:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 2> >::__run<__T>(
          __obj, __x, __y, __comp);
    case 3:
      return __tex_fetch_v4<__Tag<10000 + __tex_op_hash("__tex2Dgather_rmnf_v2") * 100 + 3> >::__run<__T>(
          __obj, __x, __y, __comp);
    }
  }
};
# 527 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex3D_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { int4 __r; asm("tex.3d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { uint4 __r; asm("tex.3d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { float4 __r; asm("tex.3d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex3D_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { float4 __r; asm("tex.3d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { float4 __r; asm("tex.3d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex3D")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex3D_v2")> > {};
# 539 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                          ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex3DGrad_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { int4 __r; asm("tex.grad.3d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { uint4 __r; asm("tex.grad.3d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.3d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex3DGrad_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.3d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.3d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex3DGrad_v2")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex3DGrad_v2")> > {};
# 561 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                                                            ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex3DLod_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { int4 __r; asm("tex.level.3d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { uint4 __r; asm("tex.level.3d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { float4 __r; asm("tex.level.3d.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__tex3DLod_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { float4 __r; asm("tex.level.3d.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { float4 __r; asm("tex.level.3d.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itex3DLod")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__tex3DLod_v2")> > {};
# 577 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                                        ;


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemap_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { int4 __r; asm("tex.cube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { uint4 __r; asm("tex.cube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { float4 __r; asm("tex.cube.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemap_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { float4 __r; asm("tex.cube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z) { float4 __r; asm("tex.cube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itexCubemap")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemap_v2")> > {};
# 593 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
                                          ;

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapGrad_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { int4 __r; asm("tex.grad.cube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { uint4 __r; asm("tex.grad.cube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.cube.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapGrad_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.cube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.cube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], " "{%8, %9, %10, %10}, {%11, %12, %13, %13};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itexCubemapGrad_v2")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapGrad_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayered_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer) { int4 __r; asm("tex.acube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer) { uint4 __r; asm("tex.acube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer) { float4 __r; asm("tex.acube.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayered_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer) { float4 __r; asm("tex.acube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer) { float4 __r; asm("tex.acube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}];" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itexCubemapLayered")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayered_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayeredGrad_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy) { int4 __r; asm("tex.grad.acube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy) { uint4 __r; asm("tex.grad.acube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.acube.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayeredGrad_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.acube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, const float4 *__dPdx, const float4 *__dPdy) { float4 __r; asm("tex.grad.acube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], " "{%9, %10, %11, %11}, {%12, %13, %14, %14};" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__dPdx->x), "f"(__dPdx->y), "f"(__dPdx->z), "f"(__dPdy->x), "f"(__dPdy->y), "f"(__dPdy->z)); return __r; } };
# 620 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itexCubemapLayeredGrad_v2")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayeredGrad_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayeredLod_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, float __level); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, float __level) { int4 __r; asm("tex.level.acube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, float __level) { uint4 __r; asm("tex.level.acube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, float __level) { float4 __r; asm("tex.level.acube.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayeredLod_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, float __level); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, float __level) { float4 __r; asm("tex.level.acube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, int __layer, float __level) { float4 __r; asm("tex.level.acube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %8}], %9;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "r"(__layer), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itexCubemapLayeredLod")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLayeredLod_v2")> > {};

template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLod_v2")> > { template <class T> __attribute__((device)) static T __run(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level); template <> __attribute__((device)) int4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { int4 __r; asm("tex.level.cube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) uint4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { uint4 __r; asm("tex.level.cube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<float4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { float4 __r; asm("tex.level.cube.v4" ".f32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "f"(__r.x), "=" "f"(__r.y), "=" "f"(__r.z), "=" "f"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } }; template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLod_rmnf_v2")> > { template <class T> __attribute__((device)) static float4 __run(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level); template <> __attribute__((device)) float4 __run<int4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { float4 __r; asm("tex.level.cube.v4" ".s32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } template <> __attribute__((device)) float4 __run<uint4>(cudaTextureObject_t __obj, float __x, float __y, float __z, float __level) { float4 __r; asm("tex.level.cube.v4" ".u32." "f32" "\t" "{%0, %1, %2, %3}, [%4, {%5, %6, %7, %7}], %8;" : "=" "r"(__r.x), "=" "r"(__r.y), "=" "r"(__r.z), "=" "r"(__r.w) : "l"(__obj), "f"(__x), "f"(__y), "f"(__z), "f"(__level)); return __r; } };


template <> struct __tex_fetch_v4<__Tag<__tex_op_hash("__itexCubemapLod")> > : __tex_fetch_v4<__Tag<__tex_op_hash("__texCubemapLod_v2")> > {};


template <class __DestT, class __SrcT> struct __convert {
  template <int __NElements = sizeof(__DestT) /
                              sizeof(typename __TypeInfoT<__DestT>::__base_t)>
  __attribute__((device)) static __DestT __run(__SrcT __v);
  template <> __attribute__((device)) static __DestT __run<1>(__SrcT __v) { return {__v.x}; }
  template <> __attribute__((device)) static __DestT __run<2>(__SrcT __v) {
    return {__v.x, __v.y};
  }
  template <> __attribute__((device)) static __DestT __run<3>(__SrcT __v) {
    return {__v.x, __v.y, __v.z};
  }
  template <> __attribute__((device)) static __DestT __run<4>(__SrcT __v) {
    return {__v.x, __v.y, __v.z, __v.w};
  }
};
# 661 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_texture_intrinsics.h" 3
template <class __op, class __T, class... __Args>
__attribute__((device)) static void __tex_fetch(__T *__ptr, cudaTextureObject_t __handle,
                                   __Args... __args) {
  using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
  *__ptr = __convert<__T, __FetchT>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(__handle, __args...));
}


template <class __T>
__attribute__((device)) cudaTextureObject_t __tex_handle_to_obj(__T __handle) {
  cudaTextureObject_t __obj;
  asm("mov.b64 %0, %1; " : "=l"(__obj) : "l"(__handle));
  return __obj;
}


template <class __op, class __T, class __HandleT, class... __Args>
__attribute__((device)) static void __tex_fetch(__T *__ptr, __HandleT __handle,
                                   __Args... __args) {
  using __FetchT = typename __TypeInfoT<__T>::__fetch_t;
  *__ptr = __convert<__T, __FetchT>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(
          __tex_handle_to_obj(__handle), __args...));
}


template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
__attribute__((device)) static void
__tex_fetch(__DataT *, __RetT *__ptr,
            texture<__DataT, __TexT, cudaReadModeNormalizedFloat> __handle,
            __Args... __args) {
  using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t;
  *__ptr = __convert<__RetT, float4>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(
          __tex_handle_to_obj(__handle), __args...));
}


template <class __op, class __DataT, class __RetT, int __TexT, class... __Args>
__attribute__((device)) static void
__tex_fetch(__DataT *, __RetT *__ptr,
            texture<__DataT, __TexT, cudaReadModeElementType> __handle,
            __Args... __args) {
  using __FetchT = typename __TypeInfoT<__DataT>::__fetch_t;
  *__ptr = __convert<__RetT, __FetchT>::__run(
      __tex_fetch_v4<__op>::template __run<__FetchT>(
          __tex_handle_to_obj(__handle), __args...));
}
}
}
# 366 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 387 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/texture_fetch_functions.h" 1 3
# 72 "/usr/local/cuda-11.7/include/texture_fetch_functions.h" 3
template <typename T>
struct __nv_tex_rmet_ret { };

template<> struct __nv_tex_rmet_ret<char> { typedef char type; };
template<> struct __nv_tex_rmet_ret<signed char> { typedef signed char type; };
template<> struct __nv_tex_rmet_ret<unsigned char> { typedef unsigned char type; };
template<> struct __nv_tex_rmet_ret<char1> { typedef char1 type; };
template<> struct __nv_tex_rmet_ret<uchar1> { typedef uchar1 type; };
template<> struct __nv_tex_rmet_ret<char2> { typedef char2 type; };
template<> struct __nv_tex_rmet_ret<uchar2> { typedef uchar2 type; };
template<> struct __nv_tex_rmet_ret<char4> { typedef char4 type; };
template<> struct __nv_tex_rmet_ret<uchar4> { typedef uchar4 type; };

template<> struct __nv_tex_rmet_ret<short> { typedef short type; };
template<> struct __nv_tex_rmet_ret<unsigned short> { typedef unsigned short type; };
template<> struct __nv_tex_rmet_ret<short1> { typedef short1 type; };
template<> struct __nv_tex_rmet_ret<ushort1> { typedef ushort1 type; };
template<> struct __nv_tex_rmet_ret<short2> { typedef short2 type; };
template<> struct __nv_tex_rmet_ret<ushort2> { typedef ushort2 type; };
template<> struct __nv_tex_rmet_ret<short4> { typedef short4 type; };
template<> struct __nv_tex_rmet_ret<ushort4> { typedef ushort4 type; };

template<> struct __nv_tex_rmet_ret<int> { typedef int type; };
template<> struct __nv_tex_rmet_ret<unsigned int> { typedef unsigned int type; };
template<> struct __nv_tex_rmet_ret<int1> { typedef int1 type; };
template<> struct __nv_tex_rmet_ret<uint1> { typedef uint1 type; };
template<> struct __nv_tex_rmet_ret<int2> { typedef int2 type; };
template<> struct __nv_tex_rmet_ret<uint2> { typedef uint2 type; };
template<> struct __nv_tex_rmet_ret<int4> { typedef int4 type; };
template<> struct __nv_tex_rmet_ret<uint4> { typedef uint4 type; };
# 113 "/usr/local/cuda-11.7/include/texture_fetch_functions.h" 3
template<> struct __nv_tex_rmet_ret<float> { typedef float type; };
template<> struct __nv_tex_rmet_ret<float1> { typedef float1 type; };
template<> struct __nv_tex_rmet_ret<float2> { typedef float2 type; };
template<> struct __nv_tex_rmet_ret<float4> { typedef float4 type; };


template <typename T> struct __nv_tex_rmet_cast { typedef T* type; };
# 131 "/usr/local/cuda-11.7/include/texture_fetch_functions.h" 3
template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex1Dfetch(texture<T, 0x01, cudaReadModeElementType> t, int x)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1Dfetch_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x);
  return temp;

}

template <typename T>
struct __nv_tex_rmnf_ret { };

template <> struct __nv_tex_rmnf_ret<char> { typedef float type; };
template <> struct __nv_tex_rmnf_ret<signed char> { typedef float type; };
template <> struct __nv_tex_rmnf_ret<unsigned char> { typedef float type; };
template <> struct __nv_tex_rmnf_ret<short> { typedef float type; };
template <> struct __nv_tex_rmnf_ret<unsigned short> { typedef float type; };
template <> struct __nv_tex_rmnf_ret<char1> { typedef float1 type; };
template <> struct __nv_tex_rmnf_ret<uchar1> { typedef float1 type; };
template <> struct __nv_tex_rmnf_ret<short1> { typedef float1 type; };
template <> struct __nv_tex_rmnf_ret<ushort1> { typedef float1 type; };
template <> struct __nv_tex_rmnf_ret<char2> { typedef float2 type; };
template <> struct __nv_tex_rmnf_ret<uchar2> { typedef float2 type; };
template <> struct __nv_tex_rmnf_ret<short2> { typedef float2 type; };
template <> struct __nv_tex_rmnf_ret<ushort2> { typedef float2 type; };
template <> struct __nv_tex_rmnf_ret<char4> { typedef float4 type; };
template <> struct __nv_tex_rmnf_ret<uchar4> { typedef float4 type; };
template <> struct __nv_tex_rmnf_ret<short4> { typedef float4 type; };
template <> struct __nv_tex_rmnf_ret<ushort4> { typedef float4 type; };

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex1Dfetch(texture<T, 0x01, cudaReadModeNormalizedFloat> t, int x)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1Dfetch_rmnf_v2")>>(&type_dummy, &retval, t, x);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex1D(texture<T, 0x01, cudaReadModeElementType> t, float x)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1D_v2")>>((typename __nv_tex_rmet_cast<T>::type) &temp, t, x);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex1D(texture<T, 0x01, cudaReadModeNormalizedFloat> t, float x)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1D_rmnf_v2")>>(&type_dummy, &retval, t, x);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex2D(texture<T, 0x02, cudaReadModeElementType> t, float x, float y)
{

  typename __nv_tex_rmet_ret<T>::type temp;

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2D_v2")>>((typename __nv_tex_rmet_cast<T>::type) &temp, t, x, y);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex2D(texture<T, 0x02, cudaReadModeNormalizedFloat> t, float x, float y)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2D_rmnf_v2")>>(&type_dummy, &retval, t, x, y);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex1DLayered(texture<T, 0xF1, cudaReadModeElementType> t, float x, int layer)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLayered_v2")>>((typename __nv_tex_rmet_cast<T>::type) &temp, t, x, layer);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex1DLayered(texture<T, 0xF1, cudaReadModeNormalizedFloat> t, float x, int layer)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLayered_rmnf_v2")>>(&type_dummy, &retval, t, x, layer);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex2DLayered(texture<T, 0xF2, cudaReadModeElementType> t, float x, float y, int layer)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLayered_v2")>>((typename __nv_tex_rmet_cast<T>::type) &temp, t, x, y, layer);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex2DLayered(texture<T, 0xF2, cudaReadModeNormalizedFloat> t, float x, float y, int layer)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLayered_rmnf_v2")>>(&type_dummy, &retval, t, x, y, layer);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex3D(texture<T, 0x03, cudaReadModeElementType> t, float x, float y, float z)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex3D_v2")>>((typename __nv_tex_rmet_cast<T>::type) &temp, t, x, y, z);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex3D(texture<T, 0x03, cudaReadModeNormalizedFloat> t, float x, float y, float z)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex3D_rmnf_v2")>>(&type_dummy, &retval, t, x, y, z);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type texCubemap(texture<T, 0x0C, cudaReadModeElementType> t, float x, float y, float z)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemap_v2")>>((typename __nv_tex_rmet_cast<T>::type) &temp, t, x, y, z);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type texCubemap(texture<T, 0x0C, cudaReadModeNormalizedFloat> t, float x, float y, float z)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemap_rmnf_v2")>>(&type_dummy, &retval, t, x, y, z);
  return retval;

}


template <typename T>
struct __nv_tex2dgather_ret { };
template <> struct __nv_tex2dgather_ret<char> { typedef char4 type; };
template <> struct __nv_tex2dgather_ret<signed char> { typedef char4 type; };
template <> struct __nv_tex2dgather_ret<char1> { typedef char4 type; };
template <> struct __nv_tex2dgather_ret<char2> { typedef char4 type; };
template <> struct __nv_tex2dgather_ret<char3> { typedef char4 type; };
template <> struct __nv_tex2dgather_ret<char4> { typedef char4 type; };
template <> struct __nv_tex2dgather_ret<unsigned char> { typedef uchar4 type; };
template <> struct __nv_tex2dgather_ret<uchar1> { typedef uchar4 type; };
template <> struct __nv_tex2dgather_ret<uchar2> { typedef uchar4 type; };
template <> struct __nv_tex2dgather_ret<uchar3> { typedef uchar4 type; };
template <> struct __nv_tex2dgather_ret<uchar4> { typedef uchar4 type; };

template <> struct __nv_tex2dgather_ret<short> { typedef short4 type; };
template <> struct __nv_tex2dgather_ret<short1> { typedef short4 type; };
template <> struct __nv_tex2dgather_ret<short2> { typedef short4 type; };
template <> struct __nv_tex2dgather_ret<short3> { typedef short4 type; };
template <> struct __nv_tex2dgather_ret<short4> { typedef short4 type; };
template <> struct __nv_tex2dgather_ret<unsigned short> { typedef ushort4 type; };
template <> struct __nv_tex2dgather_ret<ushort1> { typedef ushort4 type; };
template <> struct __nv_tex2dgather_ret<ushort2> { typedef ushort4 type; };
template <> struct __nv_tex2dgather_ret<ushort3> { typedef ushort4 type; };
template <> struct __nv_tex2dgather_ret<ushort4> { typedef ushort4 type; };

template <> struct __nv_tex2dgather_ret<int> { typedef int4 type; };
template <> struct __nv_tex2dgather_ret<int1> { typedef int4 type; };
template <> struct __nv_tex2dgather_ret<int2> { typedef int4 type; };
template <> struct __nv_tex2dgather_ret<int3> { typedef int4 type; };
template <> struct __nv_tex2dgather_ret<int4> { typedef int4 type; };
template <> struct __nv_tex2dgather_ret<unsigned int> { typedef uint4 type; };
template <> struct __nv_tex2dgather_ret<uint1> { typedef uint4 type; };
template <> struct __nv_tex2dgather_ret<uint2> { typedef uint4 type; };
template <> struct __nv_tex2dgather_ret<uint3> { typedef uint4 type; };
template <> struct __nv_tex2dgather_ret<uint4> { typedef uint4 type; };

template <> struct __nv_tex2dgather_ret<float> { typedef float4 type; };
template <> struct __nv_tex2dgather_ret<float1> { typedef float4 type; };
template <> struct __nv_tex2dgather_ret<float2> { typedef float4 type; };
template <> struct __nv_tex2dgather_ret<float3> { typedef float4 type; };
template <> struct __nv_tex2dgather_ret<float4> { typedef float4 type; };

template <typename T>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __nv_tex2dgather_ret<T>::type tex2Dgather(texture<T, 0x02, cudaReadModeElementType> t, float x, float y, int comp=0)
{

  T type_dummy;
  typename __nv_tex2dgather_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2Dgather_v2")>>(&type_dummy, &retval, t, x, y, comp);
  return retval;

}


template<typename T> struct __nv_tex2dgather_rmnf_ret { };
template<> struct __nv_tex2dgather_rmnf_ret<char> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<signed char> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<unsigned char> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<char1> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<uchar1> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<char2> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<uchar2> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<char3> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<uchar3> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<char4> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<uchar4> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<signed short> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<unsigned short> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<short1> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<ushort1> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<short2> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<ushort2> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<short3> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<ushort3> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<short4> { typedef float4 type; };
template<> struct __nv_tex2dgather_rmnf_ret<ushort4> { typedef float4 type; };

template <typename T>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __nv_tex2dgather_rmnf_ret<T>::type tex2Dgather(texture<T, 0x02, cudaReadModeNormalizedFloat> t, float x, float y, int comp = 0)
{

  T type_dummy;
  typename __nv_tex2dgather_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2Dgather_rmnf_v2")>>(&type_dummy, &retval, t, x, y, comp);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex1DLod(texture<T, 0x01, cudaReadModeElementType> t, float x, float level)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLod_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, level);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex1DLod(texture<T, 0x01, cudaReadModeNormalizedFloat> t, float x, float level)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLod_rmnf_v2")>>(&type_dummy, &retval, t, x, level);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex2DLod(texture<T, 0x02, cudaReadModeElementType> t, float x, float y, float level)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLod_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, level);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex2DLod(texture<T, 0x02, cudaReadModeNormalizedFloat> t, float x, float y, float level)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLod_rmnf_v2")>>(&type_dummy, &retval, t, x, y, level);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex1DLayeredLod(texture<T, 0xF1, cudaReadModeElementType> t, float x, int layer, float level)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLayeredLod_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, layer, level);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex1DLayeredLod(texture<T, 0xF1, cudaReadModeNormalizedFloat> t, float x, int layer, float level)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLayeredLod_rmnf_v2")>>(&type_dummy, &retval, t, x, layer, level);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex2DLayeredLod(texture<T, 0xF2, cudaReadModeElementType> t, float x, float y, int layer, float level)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLayeredLod_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, layer, level);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex2DLayeredLod(texture<T, 0xF2, cudaReadModeNormalizedFloat> t, float x, float y, int layer, float level)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLayeredLod_rmnf_v2")>>(&type_dummy, &retval, t, x, y, layer, level);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex3DLod(texture<T, 0x03, cudaReadModeElementType> t, float x, float y, float z, float level)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex3DLod_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, z, level);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex3DLod(texture<T, 0x03, cudaReadModeNormalizedFloat> t, float x, float y, float z, float level)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex3DLod_rmnf_v2")>>(&type_dummy, &retval, t, x, y, z, level);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type texCubemapLod(texture<T, 0x0C, cudaReadModeElementType> t, float x, float y, float z, float level)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLod_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, z, level);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type texCubemapLod(texture<T, 0x0C, cudaReadModeNormalizedFloat> t, float x, float y, float z, float level)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLod_rmnf_v2")>>(&type_dummy, &retval, t, x, y, z, level);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type texCubemapLayered(texture<T, 0xFC, cudaReadModeElementType> t, float x, float y, float z, int layer)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLayered_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, z, layer);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type texCubemapLayered(texture<T, 0xFC, cudaReadModeNormalizedFloat> t, float x, float y, float z, int layer)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLayered_rmnf_v2")>>(&type_dummy, &retval, t, x, y, z, layer);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type texCubemapLayeredLod(texture<T, 0xFC, cudaReadModeElementType> t, float x, float y, float z, int layer, float level)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLayeredLod_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, z, layer, level);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type texCubemapLayeredLod(texture<T, 0xFC, cudaReadModeNormalizedFloat> t, float x, float y, float z, int layer, float level)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLayeredLod_rmnf_v2")>>(&type_dummy, &retval, t, x, y, z, layer, level);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type texCubemapGrad(texture<T, 0x0C, cudaReadModeElementType> t, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapGrad_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, z, &dPdx, &dPdy);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type texCubemapGrad(texture<T, 0x0C, cudaReadModeNormalizedFloat> t, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapGrad_rmnf_v2")>>(&type_dummy, &retval, t, x, y, z, &dPdx, &dPdy);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type texCubemapLayeredGrad(texture<T, 0xFC, cudaReadModeElementType> t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLayeredGrad_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, z, layer, &dPdx, &dPdy);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type texCubemapLayeredGrad(texture<T, 0xFC, cudaReadModeNormalizedFloat> t, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__texCubemapLayeredGrad_rmnf_v2")>>(&type_dummy, &retval,t, x, y, z, layer, &dPdx, &dPdy);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex1DGrad(texture<T, 0x01, cudaReadModeElementType> t, float x, float dPdx, float dPdy)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DGrad_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, dPdx, dPdy);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex1DGrad(texture<T, 0x01, cudaReadModeNormalizedFloat> t, float x, float dPdx, float dPdy)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DGrad_rmnf_v2")>>(&type_dummy, &retval,t, x,dPdx, dPdy);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex2DGrad(texture<T, 0x02, cudaReadModeElementType> t, float x, float y, float2 dPdx, float2 dPdy)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DGrad_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, &dPdx, &dPdy);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex2DGrad(texture<T, 0x02, cudaReadModeNormalizedFloat> t, float x, float y, float2 dPdx, float2 dPdy)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DGrad_rmnf_v2")>>(&type_dummy, &retval,t, x, y, &dPdx, &dPdy);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex1DLayeredGrad(texture<T, 0xF1, cudaReadModeElementType> t, float x, int layer, float dPdx, float dPdy)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLayeredGrad_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, layer, dPdx, dPdy);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex1DLayeredGrad(texture<T, 0xF1, cudaReadModeNormalizedFloat> t, float x, int layer, float dPdx, float dPdy)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex1DLayeredGrad_rmnf_v2")>>(&type_dummy, &retval,t, x, layer, dPdx, dPdy);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex2DLayeredGrad(texture<T, 0xF2, cudaReadModeElementType> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLayeredGrad_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, layer, &dPdx, &dPdy);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex2DLayeredGrad(texture<T, 0xF2, cudaReadModeNormalizedFloat> t, float x, float y, int layer, float2 dPdx, float2 dPdy)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex2DLayeredGrad_rmnf_v2")>>(&type_dummy, &retval,t, x, y, layer, &dPdx, &dPdy);
  return retval;

}


template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmet_ret<T>::type tex3DGrad(texture<T, 0x03, cudaReadModeElementType> t, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  typename __nv_tex_rmet_ret<T>::type temp;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex3DGrad_v2")>>((typename __nv_tex_rmet_cast<T>::type)&temp, t, x, y, z, &dPdx, &dPdy);
  return temp;

}

template <typename T>
static __attribute__((deprecated)) __inline__ __attribute__((always_inline)) __attribute__((device)) typename __nv_tex_rmnf_ret<T>::type tex3DGrad(texture<T, 0x03, cudaReadModeNormalizedFloat> t, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  T type_dummy;
  typename __nv_tex_rmnf_ret<T>::type retval;
  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__tex3DGrad_rmnf_v2")>>(&type_dummy, &retval,t, x, y, z, &dPdx, &dPdy);
  return retval;

}
# 388 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 1 3
# 64 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T> struct __nv_itex_trait { };
template<> struct __nv_itex_trait<char> { typedef void type; };
template<> struct __nv_itex_trait<signed char> { typedef void type; };
template<> struct __nv_itex_trait<char1> { typedef void type; };
template<> struct __nv_itex_trait<char2> { typedef void type; };
template<> struct __nv_itex_trait<char4> { typedef void type; };
template<> struct __nv_itex_trait<unsigned char> { typedef void type; };
template<> struct __nv_itex_trait<uchar1> { typedef void type; };
template<> struct __nv_itex_trait<uchar2> { typedef void type; };
template<> struct __nv_itex_trait<uchar4> { typedef void type; };
template<> struct __nv_itex_trait<short> { typedef void type; };
template<> struct __nv_itex_trait<short1> { typedef void type; };
template<> struct __nv_itex_trait<short2> { typedef void type; };
template<> struct __nv_itex_trait<short4> { typedef void type; };
template<> struct __nv_itex_trait<unsigned short> { typedef void type; };
template<> struct __nv_itex_trait<ushort1> { typedef void type; };
template<> struct __nv_itex_trait<ushort2> { typedef void type; };
template<> struct __nv_itex_trait<ushort4> { typedef void type; };
template<> struct __nv_itex_trait<int> { typedef void type; };
template<> struct __nv_itex_trait<int1> { typedef void type; };
template<> struct __nv_itex_trait<int2> { typedef void type; };
template<> struct __nv_itex_trait<int4> { typedef void type; };
template<> struct __nv_itex_trait<unsigned int> { typedef void type; };
template<> struct __nv_itex_trait<uint1> { typedef void type; };
template<> struct __nv_itex_trait<uint2> { typedef void type; };
template<> struct __nv_itex_trait<uint4> { typedef void type; };
# 100 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template<> struct __nv_itex_trait<float> { typedef void type; };
template<> struct __nv_itex_trait<float1> { typedef void type; };
template<> struct __nv_itex_trait<float2> { typedef void type; };
template<> struct __nv_itex_trait<float4> { typedef void type; };


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex1Dfetch(T *ptr, cudaTextureObject_t obj, int x)
{

   ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex1Dfetch")>>(ptr, obj, x);

}

template <class T>
static __attribute__((device)) T tex1Dfetch(cudaTextureObject_t texObject, int x)
{

  T ret;
  tex1Dfetch(&ret, texObject, x);
  return ret;

}

template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex1D(T *ptr, cudaTextureObject_t obj, float x)
{

   ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex1D")>>(ptr, obj, x);

}


template <class T>
static __attribute__((device)) T tex1D(cudaTextureObject_t texObject, float x)
{

  T ret;
  tex1D(&ret, texObject, x);
  return ret;

}


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex2D(T *ptr, cudaTextureObject_t obj, float x, float y)
{

   ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex2D")>>(ptr, obj, x, y);

}

template <class T>
static __attribute__((device)) T tex2D(cudaTextureObject_t texObject, float x, float y)
{

  T ret;
  tex2D(&ret, texObject, x, y);
  return ret;

}
# 188 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex3D(T *ptr, cudaTextureObject_t obj, float x, float y, float z)
{

   ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex3D")>>(ptr, obj, x, y, z);

}

template <class T>
static __attribute__((device)) T tex3D(cudaTextureObject_t texObject, float x, float y, float z)
{

  T ret;
  tex3D(&ret, texObject, x, y, z);
  return ret;

}
# 230 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex1DLayered(T *ptr, cudaTextureObject_t obj, float x, int layer)
{

   ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex1DLayered")>>(ptr, obj, x, layer);

}

template <class T>
static __attribute__((device)) T tex1DLayered(cudaTextureObject_t texObject, float x, int layer)
{

  T ret;
  tex1DLayered(&ret, texObject, x, layer);
  return ret;

}

template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex2DLayered(T *ptr, cudaTextureObject_t obj, float x, float y, int layer)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex2DLayered")>>(ptr, obj, x, y, layer);

}

template <class T>
static __attribute__((device)) T tex2DLayered(cudaTextureObject_t texObject, float x, float y, int layer)
{

  T ret;
  tex2DLayered(&ret, texObject, x, y, layer);
  return ret;

}
# 289 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type texCubemap(T *ptr, cudaTextureObject_t obj, float x, float y, float z)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itexCubemap")>>(ptr, obj, x, y, z);

}


template <class T>
static __attribute__((device)) T texCubemap(cudaTextureObject_t texObject, float x, float y, float z)
{

  T ret;
  texCubemap(&ret, texObject, x, y, z);
  return ret;

}


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type texCubemapLayered(T *ptr, cudaTextureObject_t obj, float x, float y, float z, int layer)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itexCubemapLayered")>>(ptr, obj, x, y, z, layer);

}

template <class T>
static __attribute__((device)) T texCubemapLayered(cudaTextureObject_t texObject, float x, float y, float z, int layer)
{

  T ret;
  texCubemapLayered(&ret, texObject, x, y, z, layer);
  return ret;

}

template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex2Dgather(T *ptr, cudaTextureObject_t obj, float x, float y, int comp = 0)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex2Dgather")>>(ptr, obj, x, y, comp);

}

template <class T>
static __attribute__((device)) T tex2Dgather(cudaTextureObject_t to, float x, float y, int comp = 0)
{

  T ret;
  tex2Dgather(&ret, to, x, y, comp);
  return ret;

}
# 368 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex1DLod(T *ptr, cudaTextureObject_t obj, float x, float level)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex1DLod")>>(ptr, obj, x, level);

}

template <class T>
static __attribute__((device)) T tex1DLod(cudaTextureObject_t texObject, float x, float level)
{

  T ret;
  tex1DLod(&ret, texObject, x, level);
  return ret;

}


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex2DLod(T *ptr, cudaTextureObject_t obj, float x, float y, float level)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex2DLod")>>(ptr, obj, x, y, level);

}

template <class T>
static __attribute__((device)) T tex2DLod(cudaTextureObject_t texObject, float x, float y, float level)
{

  T ret;
  tex2DLod(&ret, texObject, x, y, level);
  return ret;

}
# 430 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex3DLod(T *ptr, cudaTextureObject_t obj, float x, float y, float z, float level)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex3DLod")>>(ptr, obj, x, y, z, level);

}

template <class T>
static __attribute__((device)) T tex3DLod(cudaTextureObject_t texObject, float x, float y, float z, float level)
{

  T ret;
  tex3DLod(&ret, texObject, x, y, z, level);
  return ret;

}
# 472 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex1DLayeredLod(T *ptr, cudaTextureObject_t obj, float x, int layer, float level)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex1DLayeredLod")>>(ptr, obj, x, layer, level);

}

template <class T>
static __attribute__((device)) T tex1DLayeredLod(cudaTextureObject_t texObject, float x, int layer, float level)
{

  T ret;
  tex1DLayeredLod(&ret, texObject, x, layer, level);
  return ret;

}


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex2DLayeredLod(T *ptr, cudaTextureObject_t obj, float x, float y, int layer, float level)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex2DLayeredLod")>>(ptr, obj, x, y, layer, level);

}

template <class T>
static __attribute__((device)) T tex2DLayeredLod(cudaTextureObject_t texObject, float x, float y, int layer, float level)
{

  T ret;
  tex2DLayeredLod(&ret, texObject, x, y, layer, level);
  return ret;

}
# 531 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type texCubemapLod(T *ptr, cudaTextureObject_t obj, float x, float y, float z, float level)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itexCubemapLod")>>(ptr, obj, x, y, z, level);

}

template <class T>
static __attribute__((device)) T texCubemapLod(cudaTextureObject_t texObject, float x, float y, float z, float level)
{

  T ret;
  texCubemapLod(&ret, texObject, x, y, z, level);
  return ret;

}


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type texCubemapGrad(T *ptr, cudaTextureObject_t obj, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itexCubemapGrad_v2")>>(ptr, obj, x, y, z, &dPdx, &dPdy);

}

template <class T>
static __attribute__((device)) T texCubemapGrad(cudaTextureObject_t texObject, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  T ret;
  texCubemapGrad(&ret, texObject, x, y, z, dPdx, dPdy);
  return ret;

}

template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type texCubemapLayeredLod(T *ptr, cudaTextureObject_t obj, float x, float y, float z, int layer, float level)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itexCubemapLayeredLod")>>(ptr, obj, x, y, z, layer, level);

}

template <class T>
static __attribute__((device)) T texCubemapLayeredLod(cudaTextureObject_t texObject, float x, float y, float z, int layer, float level)
{

  T ret;
  texCubemapLayeredLod(&ret, texObject, x, y, z, layer, level);
  return ret;

}

template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex1DGrad(T *ptr, cudaTextureObject_t obj, float x, float dPdx, float dPdy)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex1DGrad")>>(ptr, obj, x, dPdx, dPdy);

}

template <class T>
static __attribute__((device)) T tex1DGrad(cudaTextureObject_t texObject, float x, float dPdx, float dPdy)
{

  T ret;
  tex1DGrad(&ret, texObject, x, dPdx, dPdy);
  return ret;

}


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex2DGrad(T *ptr, cudaTextureObject_t obj, float x, float y, float2 dPdx, float2 dPdy)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex2DGrad_v2")>>(ptr, obj, x, y, &dPdx, &dPdy);


}

template <class T>
static __attribute__((device)) T tex2DGrad(cudaTextureObject_t texObject, float x, float y, float2 dPdx, float2 dPdy)
{

  T ret;
  tex2DGrad(&ret, texObject, x, y, dPdx, dPdy);
  return ret;

}
# 648 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex3DGrad(T *ptr, cudaTextureObject_t obj, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex3DGrad_v2")>>(ptr, obj, x, y, z, &dPdx, &dPdy);

}

template <class T>
static __attribute__((device)) T tex3DGrad(cudaTextureObject_t texObject, float x, float y, float z, float4 dPdx, float4 dPdy)
{

  T ret;
  tex3DGrad(&ret, texObject, x, y, z, dPdx, dPdy);
  return ret;

}
# 690 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex1DLayeredGrad(T *ptr, cudaTextureObject_t obj, float x, int layer, float dPdx, float dPdy)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex1DLayeredGrad")>>(ptr, obj, x, layer, dPdx, dPdy);

}

template <class T>
static __attribute__((device)) T tex1DLayeredGrad(cudaTextureObject_t texObject, float x, int layer, float dPdx, float dPdy)
{

  T ret;
  tex1DLayeredGrad(&ret, texObject, x, layer, dPdx, dPdy);
  return ret;

}


template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type tex2DLayeredGrad(T * ptr, cudaTextureObject_t obj, float x, float y, int layer, float2 dPdx, float2 dPdy)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itex2DLayeredGrad_v2")>>(ptr, obj, x, y, layer, &dPdx, &dPdy);

}

template <class T>
static __attribute__((device)) T tex2DLayeredGrad(cudaTextureObject_t texObject, float x, float y, int layer, float2 dPdx, float2 dPdy)
{

  T ret;
  tex2DLayeredGrad(&ret, texObject, x, y, layer, dPdx, dPdy);
  return ret;

}
# 750 "/usr/local/cuda-11.7/include/texture_indirect_functions.h" 3
template <typename T>
static __attribute__((device)) typename __nv_itex_trait<T>::type texCubemapLayeredGrad(T *ptr, cudaTextureObject_t obj, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
{

  ::__cuda_tex::__tex_fetch< ::__cuda_tex::__Tag<::__cuda_tex::__tex_op_hash("__itexCubemapLayeredGrad_v2")>>(ptr, obj, x, y, z, layer, &dPdx, &dPdy);

}

template <class T>
static __attribute__((device)) T texCubemapLayeredGrad(cudaTextureObject_t texObject, float x, float y, float z, int layer, float4 dPdx, float4 dPdy)
{

  T ret;
  texCubemapLayeredGrad(&ret, texObject, x, y, z, layer, dPdx, dPdy);
  return ret;

}
# 389 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 398 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
extern "C" {


__attribute__((device)) int vprintf(const char *, const char *);
__attribute__((device)) void free(void *) __attribute((nothrow));
__attribute__((device)) void *malloc(size_t) __attribute((nothrow)) __attribute__((malloc));


__attribute__((device)) void __assertfail(const char *__message, const char *__file,
                             unsigned __line, const char *__function,
                             size_t __charSize);


__attribute__((device)) static inline void __assert_fail(const char *__message,
                                            const char *__file, unsigned __line,
                                            const char *__function) {
  __assertfail(__message, __file, __line, __function, sizeof(char));
}


__attribute__((device)) int printf(const char *, ...);
}


namespace std {
__attribute__((device)) static inline void free(void *__ptr) { ::free(__ptr); }
__attribute__((device)) static inline void *malloc(size_t __size) {
  return ::malloc(__size);
}
}


__attribute__((device)) inline __cuda_builtin_threadIdx_t::operator dim3() const {
  return dim3(x, y, z);
}

__attribute__((device)) inline __cuda_builtin_threadIdx_t::operator uint3() const {
  return {x, y, z};
}

__attribute__((device)) inline __cuda_builtin_blockIdx_t::operator dim3() const {
  return dim3(x, y, z);
}

__attribute__((device)) inline __cuda_builtin_blockIdx_t::operator uint3() const {
  return {x, y, z};
}

__attribute__((device)) inline __cuda_builtin_blockDim_t::operator dim3() const {
  return dim3(x, y, z);
}

__attribute__((device)) inline __cuda_builtin_blockDim_t::operator uint3() const {
  return {x, y, z};
}

__attribute__((device)) inline __cuda_builtin_gridDim_t::operator dim3() const {
  return dim3(x, y, z);
}

__attribute__((device)) inline __cuda_builtin_gridDim_t::operator uint3() const {
  return {x, y, z};
}

# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 1 3
# 16 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 1 3
# 41 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
# 158 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
namespace std __attribute__ ((__visibility__ ("default")))
{


  enum float_round_style
  {
    round_indeterminate = -1,
    round_toward_zero = 0,
    round_to_nearest = 1,
    round_toward_infinity = 2,
    round_toward_neg_infinity = 3
  };


  enum float_denorm_style
  {

    denorm_indeterminate = -1,

    denorm_absent = 0,

    denorm_present = 1
  };
# 202 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
  struct __numeric_limits_base
  {


    static constexpr bool is_specialized = false;


    static constexpr int digits = 0;


    static constexpr int digits10 = 0;


    static constexpr int max_digits10 = 0;


    static constexpr bool is_signed = false;


    static constexpr bool is_integer = false;


    static constexpr bool is_exact = false;


    static constexpr int radix = 0;


    static constexpr int min_exponent = 0;


    static constexpr int min_exponent10 = 0;


    static constexpr int max_exponent = 0;


    static constexpr int max_exponent10 = 0;


    static constexpr bool has_infinity = false;


    static constexpr bool has_quiet_NaN = false;


    static constexpr bool has_signaling_NaN = false;


    static constexpr float_denorm_style has_denorm = denorm_absent;


    static constexpr bool has_denorm_loss = false;


    static constexpr bool is_iec559 = false;


    static constexpr bool is_bounded = false;
# 288 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
    static constexpr bool is_modulo = false;


    static constexpr bool traps = false;


    static constexpr bool tinyness_before = false;


    static constexpr float_round_style round_style =
          round_toward_zero;
  };
# 311 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
  template<typename _Tp>
    struct numeric_limits : public __numeric_limits_base
    {


      static constexpr _Tp
      min() noexcept { return _Tp(); }


      static constexpr _Tp
      max() noexcept { return _Tp(); }


      static constexpr _Tp
      lowest() noexcept { return _Tp(); }


      static constexpr _Tp
      epsilon() noexcept { return _Tp(); }


      static constexpr _Tp
      round_error() noexcept { return _Tp(); }


      static constexpr _Tp
      infinity() noexcept { return _Tp(); }


      static constexpr _Tp
      quiet_NaN() noexcept { return _Tp(); }


      static constexpr _Tp
      signaling_NaN() noexcept { return _Tp(); }


      static constexpr _Tp
      denorm_min() noexcept { return _Tp(); }
    };


  template<typename _Tp>
    struct numeric_limits<const _Tp>
    : public numeric_limits<_Tp> { };

  template<typename _Tp>
    struct numeric_limits<volatile _Tp>
    : public numeric_limits<_Tp> { };

  template<typename _Tp>
    struct numeric_limits<const volatile _Tp>
    : public numeric_limits<_Tp> { };
# 383 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
  template<>
    struct numeric_limits<bool>
    {
      static constexpr bool is_specialized = true;

      static constexpr bool
      min() noexcept { return false; }

      static constexpr bool
      max() noexcept { return true; }


      static constexpr bool
      lowest() noexcept { return min(); }

      static constexpr int digits = 1;
      static constexpr int digits10 = 0;

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = false;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr bool
      epsilon() noexcept { return false; }

      static constexpr bool
      round_error() noexcept { return false; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr bool
      infinity() noexcept { return false; }

      static constexpr bool
      quiet_NaN() noexcept { return false; }

      static constexpr bool
      signaling_NaN() noexcept { return false; }

      static constexpr bool
      denorm_min() noexcept { return false; }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;


      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<char>
    {
      static constexpr bool is_specialized = true;

      static constexpr char
      min() noexcept { return (((char)(-1) < 0) ? -(((char)(-1) < 0) ? (((((char)1 << ((sizeof(char) * 8 - ((char)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(char)0) - 1 : (char)0); }

      static constexpr char
      max() noexcept { return (((char)(-1) < 0) ? (((((char)1 << ((sizeof(char) * 8 - ((char)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(char)0); }


      static constexpr char
      lowest() noexcept { return min(); }


      static constexpr int digits = (sizeof(char) * 8 - ((char)(-1) < 0));
      static constexpr int digits10 = ((sizeof(char) * 8 - ((char)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = ((char)(-1) < 0);
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr char
      epsilon() noexcept { return 0; }

      static constexpr char
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr
      char infinity() noexcept { return char(); }

      static constexpr char
      quiet_NaN() noexcept { return char(); }

      static constexpr char
      signaling_NaN() noexcept { return char(); }

      static constexpr char
      denorm_min() noexcept { return static_cast<char>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = !is_signed;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<signed char>
    {
      static constexpr bool is_specialized = true;

      static constexpr signed char
      min() noexcept { return -127 - 1; }

      static constexpr signed char
      max() noexcept { return 127; }


      static constexpr signed char
      lowest() noexcept { return min(); }


      static constexpr int digits = (sizeof(signed char) * 8 - ((signed char)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(signed char) * 8 - ((signed char)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr signed char
      epsilon() noexcept { return 0; }

      static constexpr signed char
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr signed char
      infinity() noexcept { return static_cast<signed char>(0); }

      static constexpr signed char
      quiet_NaN() noexcept { return static_cast<signed char>(0); }

      static constexpr signed char
      signaling_NaN() noexcept
      { return static_cast<signed char>(0); }

      static constexpr signed char
      denorm_min() noexcept
      { return static_cast<signed char>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<unsigned char>
    {
      static constexpr bool is_specialized = true;

      static constexpr unsigned char
      min() noexcept { return 0; }

      static constexpr unsigned char
      max() noexcept { return 127 * 2U + 1; }


      static constexpr unsigned char
      lowest() noexcept { return min(); }


      static constexpr int digits
       = (sizeof(unsigned char) * 8 - ((unsigned char)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(unsigned char) * 8 - ((unsigned char)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = false;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr unsigned char
      epsilon() noexcept { return 0; }

      static constexpr unsigned char
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr unsigned char
      infinity() noexcept
      { return static_cast<unsigned char>(0); }

      static constexpr unsigned char
      quiet_NaN() noexcept
      { return static_cast<unsigned char>(0); }

      static constexpr unsigned char
      signaling_NaN() noexcept
      { return static_cast<unsigned char>(0); }

      static constexpr unsigned char
      denorm_min() noexcept
      { return static_cast<unsigned char>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = true;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<wchar_t>
    {
      static constexpr bool is_specialized = true;

      static constexpr wchar_t
      min() noexcept { return (((wchar_t)(-1) < 0) ? -(((wchar_t)(-1) < 0) ? (((((wchar_t)1 << ((sizeof(wchar_t) * 8 - ((wchar_t)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(wchar_t)0) - 1 : (wchar_t)0); }

      static constexpr wchar_t
      max() noexcept { return (((wchar_t)(-1) < 0) ? (((((wchar_t)1 << ((sizeof(wchar_t) * 8 - ((wchar_t)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(wchar_t)0); }


      static constexpr wchar_t
      lowest() noexcept { return min(); }


      static constexpr int digits = (sizeof(wchar_t) * 8 - ((wchar_t)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(wchar_t) * 8 - ((wchar_t)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = ((wchar_t)(-1) < 0);
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr wchar_t
      epsilon() noexcept { return 0; }

      static constexpr wchar_t
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr wchar_t
      infinity() noexcept { return wchar_t(); }

      static constexpr wchar_t
      quiet_NaN() noexcept { return wchar_t(); }

      static constexpr wchar_t
      signaling_NaN() noexcept { return wchar_t(); }

      static constexpr wchar_t
      denorm_min() noexcept { return wchar_t(); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = !is_signed;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };
# 796 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
  template<>
    struct numeric_limits<char16_t>
    {
      static constexpr bool is_specialized = true;

      static constexpr char16_t
      min() noexcept { return (((char16_t)(-1) < 0) ? -(((char16_t)(-1) < 0) ? (((((char16_t)1 << ((sizeof(char16_t) * 8 - ((char16_t)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(char16_t)0) - 1 : (char16_t)0); }

      static constexpr char16_t
      max() noexcept { return (((char16_t)(-1) < 0) ? (((((char16_t)1 << ((sizeof(char16_t) * 8 - ((char16_t)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(char16_t)0); }

      static constexpr char16_t
      lowest() noexcept { return min(); }

      static constexpr int digits = (sizeof(char16_t) * 8 - ((char16_t)(-1) < 0));
      static constexpr int digits10 = ((sizeof(char16_t) * 8 - ((char16_t)(-1) < 0)) * 643L / 2136);
      static constexpr int max_digits10 = 0;
      static constexpr bool is_signed = ((char16_t)(-1) < 0);
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr char16_t
      epsilon() noexcept { return 0; }

      static constexpr char16_t
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr char16_t
      infinity() noexcept { return char16_t(); }

      static constexpr char16_t
      quiet_NaN() noexcept { return char16_t(); }

      static constexpr char16_t
      signaling_NaN() noexcept { return char16_t(); }

      static constexpr char16_t
      denorm_min() noexcept { return char16_t(); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = !is_signed;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style = round_toward_zero;
    };


  template<>
    struct numeric_limits<char32_t>
    {
      static constexpr bool is_specialized = true;

      static constexpr char32_t
      min() noexcept { return (((char32_t)(-1) < 0) ? -(((char32_t)(-1) < 0) ? (((((char32_t)1 << ((sizeof(char32_t) * 8 - ((char32_t)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(char32_t)0) - 1 : (char32_t)0); }

      static constexpr char32_t
      max() noexcept { return (((char32_t)(-1) < 0) ? (((((char32_t)1 << ((sizeof(char32_t) * 8 - ((char32_t)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(char32_t)0); }

      static constexpr char32_t
      lowest() noexcept { return min(); }

      static constexpr int digits = (sizeof(char32_t) * 8 - ((char32_t)(-1) < 0));
      static constexpr int digits10 = ((sizeof(char32_t) * 8 - ((char32_t)(-1) < 0)) * 643L / 2136);
      static constexpr int max_digits10 = 0;
      static constexpr bool is_signed = ((char32_t)(-1) < 0);
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr char32_t
      epsilon() noexcept { return 0; }

      static constexpr char32_t
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr char32_t
      infinity() noexcept { return char32_t(); }

      static constexpr char32_t
      quiet_NaN() noexcept { return char32_t(); }

      static constexpr char32_t
      signaling_NaN() noexcept { return char32_t(); }

      static constexpr char32_t
      denorm_min() noexcept { return char32_t(); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = !is_signed;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style = round_toward_zero;
    };


  template<>
    struct numeric_limits<short>
    {
      static constexpr bool is_specialized = true;

      static constexpr short
      min() noexcept { return -32767 - 1; }

      static constexpr short
      max() noexcept { return 32767; }


      static constexpr short
      lowest() noexcept { return min(); }


      static constexpr int digits = (sizeof(short) * 8 - ((short)(-1) < 0));
      static constexpr int digits10 = ((sizeof(short) * 8 - ((short)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr short
      epsilon() noexcept { return 0; }

      static constexpr short
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr short
      infinity() noexcept { return short(); }

      static constexpr short
      quiet_NaN() noexcept { return short(); }

      static constexpr short
      signaling_NaN() noexcept { return short(); }

      static constexpr short
      denorm_min() noexcept { return short(); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<unsigned short>
    {
      static constexpr bool is_specialized = true;

      static constexpr unsigned short
      min() noexcept { return 0; }

      static constexpr unsigned short
      max() noexcept { return 32767 * 2U + 1; }


      static constexpr unsigned short
      lowest() noexcept { return min(); }


      static constexpr int digits
       = (sizeof(unsigned short) * 8 - ((unsigned short)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(unsigned short) * 8 - ((unsigned short)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = false;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr unsigned short
      epsilon() noexcept { return 0; }

      static constexpr unsigned short
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr unsigned short
      infinity() noexcept
      { return static_cast<unsigned short>(0); }

      static constexpr unsigned short
      quiet_NaN() noexcept
      { return static_cast<unsigned short>(0); }

      static constexpr unsigned short
      signaling_NaN() noexcept
      { return static_cast<unsigned short>(0); }

      static constexpr unsigned short
      denorm_min() noexcept
      { return static_cast<unsigned short>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = true;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<int>
    {
      static constexpr bool is_specialized = true;

      static constexpr int
      min() noexcept { return -2147483647 - 1; }

      static constexpr int
      max() noexcept { return 2147483647; }


      static constexpr int
      lowest() noexcept { return min(); }


      static constexpr int digits = (sizeof(int) * 8 - ((int)(-1) < 0));
      static constexpr int digits10 = ((sizeof(int) * 8 - ((int)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr int
      epsilon() noexcept { return 0; }

      static constexpr int
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr int
      infinity() noexcept { return static_cast<int>(0); }

      static constexpr int
      quiet_NaN() noexcept { return static_cast<int>(0); }

      static constexpr int
      signaling_NaN() noexcept { return static_cast<int>(0); }

      static constexpr int
      denorm_min() noexcept { return static_cast<int>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<unsigned int>
    {
      static constexpr bool is_specialized = true;

      static constexpr unsigned int
      min() noexcept { return 0; }

      static constexpr unsigned int
      max() noexcept { return 2147483647 * 2U + 1; }


      static constexpr unsigned int
      lowest() noexcept { return min(); }


      static constexpr int digits
       = (sizeof(unsigned int) * 8 - ((unsigned int)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(unsigned int) * 8 - ((unsigned int)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = false;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr unsigned int
      epsilon() noexcept { return 0; }

      static constexpr unsigned int
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr unsigned int
      infinity() noexcept { return static_cast<unsigned int>(0); }

      static constexpr unsigned int
      quiet_NaN() noexcept
      { return static_cast<unsigned int>(0); }

      static constexpr unsigned int
      signaling_NaN() noexcept
      { return static_cast<unsigned int>(0); }

      static constexpr unsigned int
      denorm_min() noexcept
      { return static_cast<unsigned int>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = true;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<long>
    {
      static constexpr bool is_specialized = true;

      static constexpr long
      min() noexcept { return -9223372036854775807L - 1; }

      static constexpr long
      max() noexcept { return 9223372036854775807L; }


      static constexpr long
      lowest() noexcept { return min(); }


      static constexpr int digits = (sizeof(long) * 8 - ((long)(-1) < 0));
      static constexpr int digits10 = ((sizeof(long) * 8 - ((long)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr long
      epsilon() noexcept { return 0; }

      static constexpr long
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr long
      infinity() noexcept { return static_cast<long>(0); }

      static constexpr long
      quiet_NaN() noexcept { return static_cast<long>(0); }

      static constexpr long
      signaling_NaN() noexcept { return static_cast<long>(0); }

      static constexpr long
      denorm_min() noexcept { return static_cast<long>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<unsigned long>
    {
      static constexpr bool is_specialized = true;

      static constexpr unsigned long
      min() noexcept { return 0; }

      static constexpr unsigned long
      max() noexcept { return 9223372036854775807L * 2UL + 1; }


      static constexpr unsigned long
      lowest() noexcept { return min(); }


      static constexpr int digits
       = (sizeof(unsigned long) * 8 - ((unsigned long)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(unsigned long) * 8 - ((unsigned long)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = false;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr unsigned long
      epsilon() noexcept { return 0; }

      static constexpr unsigned long
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr unsigned long
      infinity() noexcept
      { return static_cast<unsigned long>(0); }

      static constexpr unsigned long
      quiet_NaN() noexcept
      { return static_cast<unsigned long>(0); }

      static constexpr unsigned long
      signaling_NaN() noexcept
      { return static_cast<unsigned long>(0); }

      static constexpr unsigned long
      denorm_min() noexcept
      { return static_cast<unsigned long>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = true;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<long long>
    {
      static constexpr bool is_specialized = true;

      static constexpr long long
      min() noexcept { return -9223372036854775807LL - 1; }

      static constexpr long long
      max() noexcept { return 9223372036854775807LL; }


      static constexpr long long
      lowest() noexcept { return min(); }


      static constexpr int digits
       = (sizeof(long long) * 8 - ((long long)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(long long) * 8 - ((long long)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr long long
      epsilon() noexcept { return 0; }

      static constexpr long long
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr long long
      infinity() noexcept { return static_cast<long long>(0); }

      static constexpr long long
      quiet_NaN() noexcept { return static_cast<long long>(0); }

      static constexpr long long
      signaling_NaN() noexcept
      { return static_cast<long long>(0); }

      static constexpr long long
      denorm_min() noexcept { return static_cast<long long>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };


  template<>
    struct numeric_limits<unsigned long long>
    {
      static constexpr bool is_specialized = true;

      static constexpr unsigned long long
      min() noexcept { return 0; }

      static constexpr unsigned long long
      max() noexcept { return 9223372036854775807LL * 2ULL + 1; }


      static constexpr unsigned long long
      lowest() noexcept { return min(); }


      static constexpr int digits
       = (sizeof(unsigned long long) * 8 - ((unsigned long long)(-1) < 0));
      static constexpr int digits10
       = ((sizeof(unsigned long long) * 8 - ((unsigned long long)(-1) < 0)) * 643L / 2136);

      static constexpr int max_digits10 = 0;

      static constexpr bool is_signed = false;
      static constexpr bool is_integer = true;
      static constexpr bool is_exact = true;
      static constexpr int radix = 2;

      static constexpr unsigned long long
      epsilon() noexcept { return 0; }

      static constexpr unsigned long long
      round_error() noexcept { return 0; }

      static constexpr int min_exponent = 0;
      static constexpr int min_exponent10 = 0;
      static constexpr int max_exponent = 0;
      static constexpr int max_exponent10 = 0;

      static constexpr bool has_infinity = false;
      static constexpr bool has_quiet_NaN = false;
      static constexpr bool has_signaling_NaN = false;
      static constexpr float_denorm_style has_denorm
       = denorm_absent;
      static constexpr bool has_denorm_loss = false;

      static constexpr unsigned long long
      infinity() noexcept
      { return static_cast<unsigned long long>(0); }

      static constexpr unsigned long long
      quiet_NaN() noexcept
      { return static_cast<unsigned long long>(0); }

      static constexpr unsigned long long
      signaling_NaN() noexcept
      { return static_cast<unsigned long long>(0); }

      static constexpr unsigned long long
      denorm_min() noexcept
      { return static_cast<unsigned long long>(0); }

      static constexpr bool is_iec559 = false;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = true;

      static constexpr bool traps = true;
      static constexpr bool tinyness_before = false;
      static constexpr float_round_style round_style
       = round_toward_zero;
    };
# 1658 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
  __extension__ template<> struct numeric_limits<__int128> { static constexpr bool is_specialized = true; static constexpr __int128 min() noexcept { return (((__int128)(-1) < 0) ? -(((__int128)(-1) < 0) ? (((((__int128)1 << ((128 - ((__int128)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(__int128)0) - 1 : (__int128)0); } static constexpr __int128 max() noexcept { return (((__int128)(-1) < 0) ? (((((__int128)1 << ((128 - ((__int128)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(__int128)0); } static constexpr int digits = 128 - 1; static constexpr int digits10 = (128 - 1) * 643L / 2136; static constexpr bool is_signed = true; static constexpr bool is_integer = true; static constexpr bool is_exact = true; static constexpr int radix = 2; static constexpr __int128 epsilon() noexcept { return 0; } static constexpr __int128 round_error() noexcept { return 0; } static constexpr __int128 lowest() noexcept { return min(); } static constexpr int max_digits10 = 0; static constexpr int min_exponent = 0; static constexpr int min_exponent10 = 0; static constexpr int max_exponent = 0; static constexpr int max_exponent10 = 0; static constexpr bool has_infinity = false; static constexpr bool has_quiet_NaN = false; static constexpr bool has_signaling_NaN = false; static constexpr float_denorm_style has_denorm = denorm_absent; static constexpr bool has_denorm_loss = false; static constexpr __int128 infinity() noexcept { return static_cast<__int128>(0); } static constexpr __int128 quiet_NaN() noexcept { return static_cast<__int128>(0); } static constexpr __int128 signaling_NaN() noexcept { return static_cast<__int128>(0); } static constexpr __int128 denorm_min() noexcept { return static_cast<__int128>(0); } static constexpr bool is_iec559 = false; static constexpr bool is_bounded = true; static constexpr bool is_modulo = false; static constexpr bool traps = true; static constexpr bool tinyness_before = false; static constexpr float_round_style round_style = round_toward_zero; }; __extension__ template<> struct numeric_limits<unsigned __int128> { static constexpr bool is_specialized = true; static constexpr unsigned __int128 min() noexcept { return 0; } static constexpr unsigned __int128 max() noexcept { return (((unsigned __int128)(-1) < 0) ? (((((unsigned __int128)1 << ((128 - ((unsigned __int128)(-1) < 0)) - 1)) - 1) << 1) + 1) : ~(unsigned __int128)0); } static constexpr unsigned __int128 lowest() noexcept { return min(); } static constexpr int max_digits10 = 0; static constexpr int digits = 128; static constexpr int digits10 = 128 * 643L / 2136; static constexpr bool is_signed = false; static constexpr bool is_integer = true; static constexpr bool is_exact = true; static constexpr int radix = 2; static constexpr unsigned __int128 epsilon() noexcept { return 0; } static constexpr unsigned __int128 round_error() noexcept { return 0; } static constexpr int min_exponent = 0; static constexpr int min_exponent10 = 0; static constexpr int max_exponent = 0; static constexpr int max_exponent10 = 0; static constexpr bool has_infinity = false; static constexpr bool has_quiet_NaN = false; static constexpr bool has_signaling_NaN = false; static constexpr float_denorm_style has_denorm = denorm_absent; static constexpr bool has_denorm_loss = false; static constexpr unsigned __int128 infinity() noexcept { return static_cast<unsigned __int128>(0); } static constexpr unsigned __int128 quiet_NaN() noexcept { return static_cast<unsigned __int128>(0); } static constexpr unsigned __int128 signaling_NaN() noexcept { return static_cast<unsigned __int128>(0); } static constexpr unsigned __int128 denorm_min() noexcept { return static_cast<unsigned __int128>(0); } static constexpr bool is_iec559 = false; static constexpr bool is_bounded = true; static constexpr bool is_modulo = true; static constexpr bool traps = true; static constexpr bool tinyness_before = false; static constexpr float_round_style round_style = round_toward_zero; };
# 1669 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/limits" 3
  template<>
    struct numeric_limits<float>
    {
      static constexpr bool is_specialized = true;

      static constexpr float
      min() noexcept { return 1.17549435e-38F; }

      static constexpr float
      max() noexcept { return 3.40282347e+38F; }


      static constexpr float
      lowest() noexcept { return -3.40282347e+38F; }


      static constexpr int digits = 24;
      static constexpr int digits10 = 6;

      static constexpr int max_digits10
  = (2 + (24) * 643L / 2136);

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = false;
      static constexpr bool is_exact = false;
      static constexpr int radix = 2;

      static constexpr float
      epsilon() noexcept { return 1.19209290e-7F; }

      static constexpr float
      round_error() noexcept { return 0.5F; }

      static constexpr int min_exponent = (-125);
      static constexpr int min_exponent10 = (-37);
      static constexpr int max_exponent = 128;
      static constexpr int max_exponent10 = 38;

      static constexpr bool has_infinity = 1;
      static constexpr bool has_quiet_NaN = 1;
      static constexpr bool has_signaling_NaN = has_quiet_NaN;
      static constexpr float_denorm_style has_denorm
 = bool(1) ? denorm_present : denorm_absent;
      static constexpr bool has_denorm_loss
       = false;

      static constexpr float
      infinity() noexcept { return __builtin_huge_valf(); }

      static constexpr float
      quiet_NaN() noexcept { return __builtin_nanf(""); }

      static constexpr float
      signaling_NaN() noexcept { return __builtin_nansf(""); }

      static constexpr float
      denorm_min() noexcept { return 1.40129846e-45F; }

      static constexpr bool is_iec559
 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = false;
      static constexpr bool tinyness_before
       = false;
      static constexpr float_round_style round_style
       = round_to_nearest;
    };


  template<>
    struct numeric_limits<double>
    {
      static constexpr bool is_specialized = true;

      static constexpr double
      min() noexcept { return 2.2250738585072014e-308; }

      static constexpr double
      max() noexcept { return 1.7976931348623157e+308; }


      static constexpr double
      lowest() noexcept { return -1.7976931348623157e+308; }


      static constexpr int digits = 53;
      static constexpr int digits10 = 15;

      static constexpr int max_digits10
  = (2 + (53) * 643L / 2136);

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = false;
      static constexpr bool is_exact = false;
      static constexpr int radix = 2;

      static constexpr double
      epsilon() noexcept { return 2.2204460492503131e-16; }

      static constexpr double
      round_error() noexcept { return 0.5; }

      static constexpr int min_exponent = (-1021);
      static constexpr int min_exponent10 = (-307);
      static constexpr int max_exponent = 1024;
      static constexpr int max_exponent10 = 308;

      static constexpr bool has_infinity = 1;
      static constexpr bool has_quiet_NaN = 1;
      static constexpr bool has_signaling_NaN = has_quiet_NaN;
      static constexpr float_denorm_style has_denorm
 = bool(1) ? denorm_present : denorm_absent;
      static constexpr bool has_denorm_loss
        = false;

      static constexpr double
      infinity() noexcept { return __builtin_huge_val(); }

      static constexpr double
      quiet_NaN() noexcept { return __builtin_nan(""); }

      static constexpr double
      signaling_NaN() noexcept { return __builtin_nans(""); }

      static constexpr double
      denorm_min() noexcept { return 4.9406564584124654e-324; }

      static constexpr bool is_iec559
 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = false;
      static constexpr bool tinyness_before
       = false;
      static constexpr float_round_style round_style
       = round_to_nearest;
    };


  template<>
    struct numeric_limits<long double>
    {
      static constexpr bool is_specialized = true;

      static constexpr long double
      min() noexcept { return 2.2250738585072014e-308L; }

      static constexpr long double
      max() noexcept { return 1.7976931348623157e+308L; }


      static constexpr long double
      lowest() noexcept { return -1.7976931348623157e+308L; }


      static constexpr int digits = 53;
      static constexpr int digits10 = 15;

      static constexpr int max_digits10
  = (2 + (53) * 643L / 2136);

      static constexpr bool is_signed = true;
      static constexpr bool is_integer = false;
      static constexpr bool is_exact = false;
      static constexpr int radix = 2;

      static constexpr long double
      epsilon() noexcept { return 2.2204460492503131e-16L; }

      static constexpr long double
      round_error() noexcept { return 0.5L; }

      static constexpr int min_exponent = (-1021);
      static constexpr int min_exponent10 = (-307);
      static constexpr int max_exponent = 1024;
      static constexpr int max_exponent10 = 308;

      static constexpr bool has_infinity = 1;
      static constexpr bool has_quiet_NaN = 1;
      static constexpr bool has_signaling_NaN = has_quiet_NaN;
      static constexpr float_denorm_style has_denorm
 = bool(1) ? denorm_present : denorm_absent;
      static constexpr bool has_denorm_loss
 = false;

      static constexpr long double
      infinity() noexcept { return __builtin_huge_vall(); }

      static constexpr long double
      quiet_NaN() noexcept { return __builtin_nanl(""); }

      static constexpr long double
      signaling_NaN() noexcept { return __builtin_nansl(""); }

      static constexpr long double
      denorm_min() noexcept { return 4.9406564584124654e-324L; }

      static constexpr bool is_iec559
 = has_infinity && has_quiet_NaN && has_denorm == denorm_present;
      static constexpr bool is_bounded = true;
      static constexpr bool is_modulo = false;

      static constexpr bool traps = false;
      static constexpr bool tinyness_before =
      false;
      static constexpr float_round_style round_style =
            round_to_nearest;
    };


}
# 17 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 2 3
# 41 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 3
static __attribute__((device)) __inline__ __attribute__((always_inline)) long long abs(long long __n) { return ::llabs(__n); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) long abs(long __n) { return ::labs(__n); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float abs(float __x) { return ::fabsf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) double abs(double __x) { return ::fabs(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float acos(float __x) { return ::acosf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float asin(float __x) { return ::asinf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float atan(float __x) { return ::atanf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float ceil(float __x) { return ::ceilf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float cos(float __x) { return ::cosf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float cosh(float __x) { return ::coshf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float exp(float __x) { return ::expf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fabs(float __x) { return ::fabsf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float floor(float __x) { return ::floorf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) int fpclassify(float __x) {
  return __builtin_fpclassify(0, 1, 4, 3,
                              2, __x);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) int fpclassify(double __x) {
  return __builtin_fpclassify(0, 1, 4, 3,
                              2, __x);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float frexp(float __arg, int *__exp) {
  return ::frexpf(__arg, __exp);
}
# 101 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 3
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isinf(float __x) { return ::__isinff(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isinf(double __x) { return ::__isinf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isfinite(float __x) { return ::__finitef(__x); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isfinite(double __x) { return ::__isfinited(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isnan(float __x) { return ::__isnanf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isnan(double __x) { return ::__isnan(__x); }


static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isgreater(float __x, float __y) {
  return __builtin_isgreater(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isgreater(double __x, double __y) {
  return __builtin_isgreater(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isgreaterequal(float __x, float __y) {
  return __builtin_isgreaterequal(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isgreaterequal(double __x, double __y) {
  return __builtin_isgreaterequal(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isless(float __x, float __y) {
  return __builtin_isless(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isless(double __x, double __y) {
  return __builtin_isless(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool islessequal(float __x, float __y) {
  return __builtin_islessequal(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool islessequal(double __x, double __y) {
  return __builtin_islessequal(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool islessgreater(float __x, float __y) {
  return __builtin_islessgreater(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool islessgreater(double __x, double __y) {
  return __builtin_islessgreater(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isnormal(float __x) { return __builtin_isnormal(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isnormal(double __x) { return __builtin_isnormal(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isunordered(float __x, float __y) {
  return __builtin_isunordered(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool isunordered(double __x, double __y) {
  return __builtin_isunordered(__x, __y);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float ldexp(float __arg, int __exp) {
  return ::ldexpf(__arg, __exp);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float log(float __x) { return ::logf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float log10(float __x) { return ::log10f(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float pow(float __base, float __exp) {
  return ::powf(__base, __exp);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) float pow(float __base, int __iexp) {
  return ::powif(__base, __iexp);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) double pow(double __base, int __iexp) {
  return ::powi(__base, __iexp);
}
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool signbit(float __x) { return ::__signbitf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) bool signbit(double __x) { return ::__signbitd(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float sin(float __x) { return ::sinf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float sinh(float __x) { return ::sinhf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float sqrt(float __x) { return ::sqrtf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float tan(float __x) { return ::tanf(__x); }
static __attribute__((device)) __inline__ __attribute__((always_inline)) float tanh(float __x) { return ::tanhf(__x); }
# 208 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 3
template<bool __B, class __T = void>
struct __clang_cuda_enable_if {};

template <class __T> struct __clang_cuda_enable_if<true, __T> {
  typedef __T type;
};
# 241 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 3
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type acos(__T __x) { return ::acos((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type acosh(__T __x) { return ::acosh((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type asin(__T __x) { return ::asin((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type asinh(__T __x) { return ::asinh((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type atan(__T __x) { return ::atan((double)__x); }
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type atan2(__T1 __x, __T2 __y) { return atan2((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type atanh(__T __x) { return ::atanh((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type cbrt(__T __x) { return ::cbrt((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type ceil(__T __x) { return ::ceil((double)__x); }
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type copysign(__T1 __x, __T2 __y) { return copysign((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type cos(__T __x) { return ::cos((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type cosh(__T __x) { return ::cosh((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type erf(__T __x) { return ::erf((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type erfc(__T __x) { return ::erfc((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type exp(__T __x) { return ::exp((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type exp2(__T __x) { return ::exp2((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type expm1(__T __x) { return ::expm1((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type fabs(__T __x) { return ::fabs((double)__x); }
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type fdim(__T1 __x, __T2 __y) { return fdim((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type floor(__T __x) { return ::floor((double)__x); }
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type fmax(__T1 __x, __T2 __y) { return fmax((double)__x, (double)__y); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type fmin(__T1 __x, __T2 __y) { return fmin((double)__x, (double)__y); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type fmod(__T1 __x, __T2 __y) { return fmod((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, int>::type fpclassify(__T __x) { return ::fpclassify((double)__x); }
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type hypot(__T1 __x, __T2 __y) { return hypot((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, int>::type ilogb(__T __x) { return ::ilogb((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, bool>::type isfinite(__T __x) { return ::isfinite((double)__x); }
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, bool>::type isgreater(__T1 __x, __T2 __y) { return isgreater((double)__x, (double)__y); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, bool>::type isgreaterequal(__T1 __x, __T2 __y) { return isgreaterequal((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, bool>::type isinf(__T __x) { return ::isinf((double)__x); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, bool>::type isless(__T1 __x, __T2 __y) { return isless((double)__x, (double)__y); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, bool>::type islessequal(__T1 __x, __T2 __y) { return islessequal((double)__x, (double)__y); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, bool>::type islessgreater(__T1 __x, __T2 __y) { return islessgreater((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, bool>::type isnan(__T __x) { return ::isnan((double)__x); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, bool>::type isnormal(__T __x) { return ::isnormal((double)__x); }
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, bool>::type isunordered(__T1 __x, __T2 __y) { return isunordered((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type lgamma(__T __x) { return ::lgamma((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type log(__T __x) { return ::log((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type log10(__T __x) { return ::log10((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type log1p(__T __x) { return ::log1p((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type log2(__T __x) { return ::log2((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type logb(__T __x) { return ::logb((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, long long>::type llrint(__T __x) { return ::llrint((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, long long>::type llround(__T __x) { return ::llround((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, long>::type lrint(__T __x) { return ::lrint((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, long>::type lround(__T __x) { return ::lround((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type nearbyint(__T __x) { return ::nearbyint((double)__x); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type nextafter(__T1 __x, __T2 __y) { return nextafter((double)__x, (double)__y); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type pow(__T1 __x, __T2 __y) { return pow((double)__x, (double)__y); };
template <typename __T1, typename __T2> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if< std::numeric_limits<__T1>::is_specialized && std::numeric_limits<__T2>::is_specialized, double>::type remainder(__T1 __x, __T2 __y) { return remainder((double)__x, (double)__y); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type rint(__T __x) { return ::rint((double)__x); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type round(__T __x) { return ::round((double)__x); };
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, bool>::type signbit(__T __x) { return ::signbit((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type sin(__T __x) { return ::sin((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type sinh(__T __x) { return ::sinh((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type sqrt(__T __x) { return ::sqrt((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type tan(__T __x) { return ::tan((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type tanh(__T __x) { return ::tanh((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type tgamma(__T __x) { return ::tgamma((double)__x); }
template <typename __T> static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, double>::type trunc(__T __x) { return ::trunc((double)__x); };


template <typename __T1, typename __T2, typename __T3>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<
    std::numeric_limits<__T1>::is_specialized &&
        std::numeric_limits<__T2>::is_specialized &&
        std::numeric_limits<__T3>::is_specialized,
    double>::type
fma(__T1 __x, __T2 __y, __T3 __z) {
  return std::fma((double)__x, (double)__y, (double)__z);
}

template <typename __T>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
frexp(__T __x, int *__exp) {
  return std::frexp((double)__x, __exp);
}

template <typename __T>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
ldexp(__T __x, int __exp) {
  return std::ldexp((double)__x, __exp);
}

template <typename __T1, typename __T2>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<
    std::numeric_limits<__T1>::is_specialized &&
        std::numeric_limits<__T2>::is_specialized,
    double>::type
remquo(__T1 __x, __T2 __y, int *__quo) {
  return std::remquo((double)__x, (double)__y, __quo);
}

template <typename __T>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
scalbln(__T __x, long __exp) {
  return std::scalbln((double)__x, __exp);
}

template <typename __T>
static __attribute__((device)) __inline__ __attribute__((always_inline)) typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
                                           double>::type
scalbn(__T __x, int __exp) {
  return std::scalbn((double)__x, __exp);
}
# 361 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 3
namespace std {


using ::acos;
using ::acosh;
using ::asin;
using ::asinh;
using ::atan;
using ::atan2;
using ::atanh;
using ::cbrt;
using ::ceil;
using ::copysign;
using ::cos;
using ::cosh;
using ::erf;
using ::erfc;
using ::exp;
using ::exp2;
using ::expm1;
using ::fabs;
using ::fdim;
using ::floor;
using ::fma;
using ::fmax;
using ::fmin;
using ::fmod;
using ::fpclassify;
using ::frexp;
using ::hypot;
using ::ilogb;
using ::isfinite;
using ::isgreater;
using ::isgreaterequal;
using ::isless;
using ::islessequal;
using ::islessgreater;
using ::isnormal;
using ::isunordered;
using ::ldexp;
using ::lgamma;
using ::llrint;
using ::llround;
using ::log;
using ::log10;
using ::log1p;
using ::log2;
using ::logb;
using ::lrint;
using ::lround;
using ::nearbyint;
using ::nextafter;
using ::pow;
using ::remainder;
using ::remquo;
using ::rint;
using ::round;
using ::scalbln;
using ::scalbn;
using ::signbit;
using ::sin;
using ::sinh;
using ::sqrt;
using ::tan;
using ::tanh;
using ::tgamma;
using ::trunc;
# 443 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_cmath.h" 3
using ::acosf;
using ::acoshf;
using ::asinf;
using ::asinhf;
using ::atan2f;
using ::atanf;
using ::atanhf;
using ::cbrtf;
using ::ceilf;
using ::copysignf;
using ::cosf;
using ::coshf;
using ::erfcf;
using ::erff;
using ::exp2f;
using ::expf;
using ::expm1f;
using ::fabsf;
using ::fdimf;
using ::floorf;
using ::fmaf;
using ::fmaxf;
using ::fminf;
using ::fmodf;
using ::frexpf;
using ::hypotf;
using ::ilogbf;
using ::ldexpf;
using ::lgammaf;
using ::llrintf;
using ::llroundf;
using ::log10f;
using ::log1pf;
using ::log2f;
using ::logbf;
using ::logf;
using ::lrintf;
using ::lroundf;
using ::modff;
using ::nearbyintf;
using ::nextafterf;
using ::powf;
using ::remainderf;
using ::remquof;
using ::rintf;
using ::roundf;
using ::scalblnf;
using ::scalbnf;
using ::sinf;
using ::sinhf;
using ::sqrtf;
using ::tanf;
using ::tanhf;
using ::tgammaf;
using ::truncf;


}
# 473 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_intrinsics.h" 1 3
# 88 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_intrinsics.h" 3
inline __attribute__((device)) int __shfl(int __val, int __offset, int __width = warpSize) { return __nvvm_shfl_idx_i32(__val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) float __shfl(float __val, int __offset, int __width = warpSize) { return __nvvm_shfl_idx_f32(__val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) unsigned int __shfl(unsigned int __val, int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl(static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl(long long __val, int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl(__tmp.__a, __offset, __width); __tmp.__b = ::__shfl(__tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) long __shfl(long __val, int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>( ::__shfl(static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl(static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl( unsigned long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl(static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) unsigned long long __shfl( unsigned long long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl( static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl(double __val, int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl(__tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };


inline __attribute__((device)) int __shfl_up(int __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_up_i32(__val, __offset, ((warpSize - __width) << 8) | (0)); } inline __attribute__((device)) float __shfl_up(float __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_up_f32(__val, __offset, ((warpSize - __width) << 8) | (0)); } inline __attribute__((device)) unsigned int __shfl_up(unsigned int __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl_up(static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl_up(long long __val, unsigned int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl_up(__tmp.__a, __offset, __width); __tmp.__b = ::__shfl_up(__tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) long __shfl_up(long __val, unsigned int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>( ::__shfl_up(static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl_up(static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl_up( unsigned long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl_up(static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) unsigned long long __shfl_up( unsigned long long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl_up( static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl_up(double __val, unsigned int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl_up(__tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };

inline __attribute__((device)) int __shfl_down(int __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_down_i32(__val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) float __shfl_down(float __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_down_f32(__val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) unsigned int __shfl_down(unsigned int __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl_down(static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl_down(long long __val, unsigned int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl_down(__tmp.__a, __offset, __width); __tmp.__b = ::__shfl_down(__tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) long __shfl_down(long __val, unsigned int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>( ::__shfl_down(static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl_down(static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl_down( unsigned long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl_down(static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) unsigned long long __shfl_down( unsigned long long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl_down( static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl_down(double __val, unsigned int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl_down(__tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };

inline __attribute__((device)) int __shfl_xor(int __val, int __offset, int __width = warpSize) { return __nvvm_shfl_bfly_i32(__val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) float __shfl_xor(float __val, int __offset, int __width = warpSize) { return __nvvm_shfl_bfly_f32(__val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) unsigned int __shfl_xor(unsigned int __val, int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl_xor(static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl_xor(long long __val, int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl_xor(__tmp.__a, __offset, __width); __tmp.__b = ::__shfl_xor(__tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) long __shfl_xor(long __val, int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>( ::__shfl_xor(static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl_xor(static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl_xor( unsigned long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl_xor(static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) unsigned long long __shfl_xor( unsigned long long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl_xor( static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl_xor(double __val, int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl_xor(__tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };
# 173 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_intrinsics.h" 3
inline __attribute__((device)) int __shfl_sync(unsigned int __mask, int __val, int __offset, int __width = warpSize) { return __nvvm_shfl_sync_idx_i32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) float __shfl_sync(unsigned int __mask, float __val, int __offset, int __width = warpSize) { return __nvvm_shfl_sync_idx_f32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) unsigned int __shfl_sync(unsigned int __mask, unsigned int __val, int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl_sync(__mask, static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl_sync(unsigned int __mask, long long __val, int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl_sync(__mask, __tmp.__a, __offset, __width); __tmp.__b = ::__shfl_sync(__mask, __tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) unsigned long long __shfl_sync( unsigned int __mask, unsigned long long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl_sync( __mask, static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) long __shfl_sync(unsigned int __mask, long __val, int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>(::__shfl_sync( __mask, static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl_sync(__mask, static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl_sync( unsigned int __mask, unsigned long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl_sync(__mask, static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl_sync(unsigned int __mask, double __val, int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl_sync(__mask, __tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };


inline __attribute__((device)) int __shfl_up_sync(unsigned int __mask, int __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_sync_up_i32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0)); } inline __attribute__((device)) float __shfl_up_sync(unsigned int __mask, float __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_sync_up_f32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0)); } inline __attribute__((device)) unsigned int __shfl_up_sync(unsigned int __mask, unsigned int __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl_up_sync(__mask, static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl_up_sync(unsigned int __mask, long long __val, unsigned int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl_up_sync(__mask, __tmp.__a, __offset, __width); __tmp.__b = ::__shfl_up_sync(__mask, __tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) unsigned long long __shfl_up_sync( unsigned int __mask, unsigned long long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl_up_sync( __mask, static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) long __shfl_up_sync(unsigned int __mask, long __val, unsigned int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>(::__shfl_up_sync( __mask, static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl_up_sync(__mask, static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl_up_sync( unsigned int __mask, unsigned long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl_up_sync(__mask, static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl_up_sync(unsigned int __mask, double __val, unsigned int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl_up_sync(__mask, __tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };

inline __attribute__((device)) int __shfl_down_sync(unsigned int __mask, int __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_sync_down_i32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) float __shfl_down_sync(unsigned int __mask, float __val, unsigned int __offset, int __width = warpSize) { return __nvvm_shfl_sync_down_f32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) unsigned int __shfl_down_sync(unsigned int __mask, unsigned int __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl_down_sync(__mask, static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl_down_sync(unsigned int __mask, long long __val, unsigned int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl_down_sync(__mask, __tmp.__a, __offset, __width); __tmp.__b = ::__shfl_down_sync(__mask, __tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) unsigned long long __shfl_down_sync( unsigned int __mask, unsigned long long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl_down_sync( __mask, static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) long __shfl_down_sync(unsigned int __mask, long __val, unsigned int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>(::__shfl_down_sync( __mask, static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl_down_sync(__mask, static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl_down_sync( unsigned int __mask, unsigned long __val, unsigned int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl_down_sync(__mask, static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl_down_sync(unsigned int __mask, double __val, unsigned int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl_down_sync(__mask, __tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };

inline __attribute__((device)) int __shfl_xor_sync(unsigned int __mask, int __val, int __offset, int __width = warpSize) { return __nvvm_shfl_sync_bfly_i32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) float __shfl_xor_sync(unsigned int __mask, float __val, int __offset, int __width = warpSize) { return __nvvm_shfl_sync_bfly_f32(__mask, __val, __offset, ((warpSize - __width) << 8) | (0x1f)); } inline __attribute__((device)) unsigned int __shfl_xor_sync(unsigned int __mask, unsigned int __val, int __offset, int __width = warpSize) { return static_cast<unsigned int>( ::__shfl_xor_sync(__mask, static_cast<int>(__val), __offset, __width)); } inline __attribute__((device)) long long __shfl_xor_sync(unsigned int __mask, long long __val, int __offset, int __width = warpSize) { struct __Bits { int __a, __b; }; _Static_assert(sizeof(__val) == sizeof(__Bits)); _Static_assert(sizeof(__Bits) == 2 * sizeof(int)); __Bits __tmp; memcpy(&__tmp, &__val, sizeof(__val)); __tmp.__a = ::__shfl_xor_sync(__mask, __tmp.__a, __offset, __width); __tmp.__b = ::__shfl_xor_sync(__mask, __tmp.__b, __offset, __width); long long __ret; memcpy(&__ret, &__tmp, sizeof(__tmp)); return __ret; } inline __attribute__((device)) unsigned long long __shfl_xor_sync( unsigned int __mask, unsigned long long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long long>(::__shfl_xor_sync( __mask, static_cast<unsigned long long>(__val), __offset, __width)); } inline __attribute__((device)) long __shfl_xor_sync(unsigned int __mask, long __val, int __offset, int __width = warpSize) { _Static_assert(sizeof(long) == sizeof(long long) || sizeof(long) == sizeof(int)); if (sizeof(long) == sizeof(long long)) { return static_cast<long>(::__shfl_xor_sync( __mask, static_cast<long long>(__val), __offset, __width)); } else if (sizeof(long) == sizeof(int)) { return static_cast<long>( ::__shfl_xor_sync(__mask, static_cast<int>(__val), __offset, __width)); } } inline __attribute__((device)) unsigned long __shfl_xor_sync( unsigned int __mask, unsigned long __val, int __offset, int __width = warpSize) { return static_cast<unsigned long>( ::__shfl_xor_sync(__mask, static_cast<long>(__val), __offset, __width)); } inline __attribute__((device)) double __shfl_xor_sync(unsigned int __mask, double __val, int __offset, int __width = warpSize) { long long __tmp; _Static_assert(sizeof(__tmp) == sizeof(__val)); memcpy(&__tmp, &__val, sizeof(__val)); __tmp = ::__shfl_xor_sync(__mask, __tmp, __offset, __width); double __ret; memcpy(&__ret, &__tmp, sizeof(__ret)); return __ret; };


inline __attribute__((device)) void __syncwarp(unsigned int mask = 0xffffffff) {
  return __nvvm_bar_warp_sync(mask);
}

inline __attribute__((device)) void __barrier_sync(unsigned int id) {
  __nvvm_barrier_sync(id);
}

inline __attribute__((device)) void __barrier_sync_count(unsigned int id,
                                            unsigned int count) {
  __nvvm_barrier_sync_cnt(id, count);
}

inline __attribute__((device)) int __all_sync(unsigned int mask, int pred) {
  return __nvvm_vote_all_sync(mask, pred);
}

inline __attribute__((device)) int __any_sync(unsigned int mask, int pred) {
  return __nvvm_vote_any_sync(mask, pred);
}

inline __attribute__((device)) int __uni_sync(unsigned int mask, int pred) {
  return __nvvm_vote_uni_sync(mask, pred);
}

inline __attribute__((device)) unsigned int __ballot_sync(unsigned int mask, int pred) {
  return __nvvm_vote_ballot_sync(mask, pred);
}

inline __attribute__((device)) unsigned int __activemask() {


  unsigned int mask;
  asm volatile("activemask.b32 %0;" : "=r"(mask));
  return mask;

}

inline __attribute__((device)) unsigned int __fns(unsigned mask, unsigned base, int offset) {
  return __nvvm_fns(mask, base, offset);
}
# 264 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_intrinsics.h" 3
inline __attribute__((device)) char __ldg(const char *ptr) { return __nvvm_ldg_c(ptr); }
inline __attribute__((device)) short __ldg(const short *ptr) { return __nvvm_ldg_s(ptr); }
inline __attribute__((device)) int __ldg(const int *ptr) { return __nvvm_ldg_i(ptr); }
inline __attribute__((device)) long __ldg(const long *ptr) { return __nvvm_ldg_l(ptr); }
inline __attribute__((device)) long long __ldg(const long long *ptr) {
  return __nvvm_ldg_ll(ptr);
}
inline __attribute__((device)) unsigned char __ldg(const unsigned char *ptr) {
  return __nvvm_ldg_uc(ptr);
}
inline __attribute__((device)) signed char __ldg(const signed char *ptr) {
  return __nvvm_ldg_uc((const unsigned char *)ptr);
}
inline __attribute__((device)) unsigned short __ldg(const unsigned short *ptr) {
  return __nvvm_ldg_us(ptr);
}
inline __attribute__((device)) unsigned int __ldg(const unsigned int *ptr) {
  return __nvvm_ldg_ui(ptr);
}
inline __attribute__((device)) unsigned long __ldg(const unsigned long *ptr) {
  return __nvvm_ldg_ul(ptr);
}
inline __attribute__((device)) unsigned long long __ldg(const unsigned long long *ptr) {
  return __nvvm_ldg_ull(ptr);
}
inline __attribute__((device)) float __ldg(const float *ptr) { return __nvvm_ldg_f(ptr); }
inline __attribute__((device)) double __ldg(const double *ptr) { return __nvvm_ldg_d(ptr); }

inline __attribute__((device)) char2 __ldg(const char2 *ptr) {
  typedef char c2 __attribute__((ext_vector_type(2)));


  c2 rv = __nvvm_ldg_c2(reinterpret_cast<const c2 *>(ptr));
  char2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}
inline __attribute__((device)) char4 __ldg(const char4 *ptr) {
  typedef char c4 __attribute__((ext_vector_type(4)));
  c4 rv = __nvvm_ldg_c4(reinterpret_cast<const c4 *>(ptr));
  char4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
}
inline __attribute__((device)) short2 __ldg(const short2 *ptr) {
  typedef short s2 __attribute__((ext_vector_type(2)));
  s2 rv = __nvvm_ldg_s2(reinterpret_cast<const s2 *>(ptr));
  short2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}
inline __attribute__((device)) short4 __ldg(const short4 *ptr) {
  typedef short s4 __attribute__((ext_vector_type(4)));
  s4 rv = __nvvm_ldg_s4(reinterpret_cast<const s4 *>(ptr));
  short4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
}
inline __attribute__((device)) int2 __ldg(const int2 *ptr) {
  typedef int i2 __attribute__((ext_vector_type(2)));
  i2 rv = __nvvm_ldg_i2(reinterpret_cast<const i2 *>(ptr));
  int2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}
inline __attribute__((device)) int4 __ldg(const int4 *ptr) {
  typedef int i4 __attribute__((ext_vector_type(4)));
  i4 rv = __nvvm_ldg_i4(reinterpret_cast<const i4 *>(ptr));
  int4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
}
inline __attribute__((device)) longlong2 __ldg(const longlong2 *ptr) {
  typedef long long ll2 __attribute__((ext_vector_type(2)));
  ll2 rv = __nvvm_ldg_ll2(reinterpret_cast<const ll2 *>(ptr));
  longlong2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}

inline __attribute__((device)) uchar2 __ldg(const uchar2 *ptr) {
  typedef unsigned char uc2 __attribute__((ext_vector_type(2)));
  uc2 rv = __nvvm_ldg_uc2(reinterpret_cast<const uc2 *>(ptr));
  uchar2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}
inline __attribute__((device)) uchar4 __ldg(const uchar4 *ptr) {
  typedef unsigned char uc4 __attribute__((ext_vector_type(4)));
  uc4 rv = __nvvm_ldg_uc4(reinterpret_cast<const uc4 *>(ptr));
  uchar4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
}
inline __attribute__((device)) ushort2 __ldg(const ushort2 *ptr) {
  typedef unsigned short us2 __attribute__((ext_vector_type(2)));
  us2 rv = __nvvm_ldg_us2(reinterpret_cast<const us2 *>(ptr));
  ushort2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}
inline __attribute__((device)) ushort4 __ldg(const ushort4 *ptr) {
  typedef unsigned short us4 __attribute__((ext_vector_type(4)));
  us4 rv = __nvvm_ldg_us4(reinterpret_cast<const us4 *>(ptr));
  ushort4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
}
inline __attribute__((device)) uint2 __ldg(const uint2 *ptr) {
  typedef unsigned int ui2 __attribute__((ext_vector_type(2)));
  ui2 rv = __nvvm_ldg_ui2(reinterpret_cast<const ui2 *>(ptr));
  uint2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}
inline __attribute__((device)) uint4 __ldg(const uint4 *ptr) {
  typedef unsigned int ui4 __attribute__((ext_vector_type(4)));
  ui4 rv = __nvvm_ldg_ui4(reinterpret_cast<const ui4 *>(ptr));
  uint4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
}
inline __attribute__((device)) ulonglong2 __ldg(const ulonglong2 *ptr) {
  typedef unsigned long long ull2 __attribute__((ext_vector_type(2)));
  ull2 rv = __nvvm_ldg_ull2(reinterpret_cast<const ull2 *>(ptr));
  ulonglong2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}

inline __attribute__((device)) float2 __ldg(const float2 *ptr) {
  typedef float f2 __attribute__((ext_vector_type(2)));
  f2 rv = __nvvm_ldg_f2(reinterpret_cast<const f2 *>(ptr));
  float2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}
inline __attribute__((device)) float4 __ldg(const float4 *ptr) {
  typedef float f4 __attribute__((ext_vector_type(4)));
  f4 rv = __nvvm_ldg_f4(reinterpret_cast<const f4 *>(ptr));
  float4 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  ret.z = rv[2];
  ret.w = rv[3];
  return ret;
}
inline __attribute__((device)) double2 __ldg(const double2 *ptr) {
  typedef double d2 __attribute__((ext_vector_type(2)));
  d2 rv = __nvvm_ldg_d2(reinterpret_cast<const d2 *>(ptr));
  double2 ret;
  ret.x = rv[0];
  ret.y = rv[1];
  return ret;
}


inline __attribute__((device)) unsigned __funnelshift_l(unsigned low32, unsigned high32,
                                           unsigned shiftWidth) {
  unsigned result;
  asm("shf.l.wrap.b32 %0, %1, %2, %3;"
      : "=r"(result)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return result;
}
inline __attribute__((device)) unsigned __funnelshift_lc(unsigned low32, unsigned high32,
                                            unsigned shiftWidth) {
  unsigned result;
  asm("shf.l.clamp.b32 %0, %1, %2, %3;"
      : "=r"(result)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return result;
}
inline __attribute__((device)) unsigned __funnelshift_r(unsigned low32, unsigned high32,
                                           unsigned shiftWidth) {
  unsigned result;
  asm("shf.r.wrap.b32 %0, %1, %2, %3;"
      : "=r"(result)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return result;
}
inline __attribute__((device)) unsigned __funnelshift_rc(unsigned low32, unsigned high32,
                                            unsigned shiftWidth) {
  unsigned ret;
  asm("shf.r.clamp.b32 %0, %1, %2, %3;"
      : "=r"(ret)
      : "r"(low32), "r"(high32), "r"(shiftWidth));
  return ret;
}


extern "C" {
__attribute__((device)) inline size_t __nv_cvta_generic_to_global_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(1))) *)__ptr;
}
__attribute__((device)) inline size_t __nv_cvta_generic_to_shared_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(3))) *)__ptr;
}
__attribute__((device)) inline size_t __nv_cvta_generic_to_constant_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(4))) *)__ptr;
}
__attribute__((device)) inline size_t __nv_cvta_generic_to_local_impl(const void *__ptr) {
  return (size_t)(void __attribute__((address_space(5))) *)__ptr;
}
__attribute__((device)) inline void *__nv_cvta_global_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(1))) *)__ptr;
}
__attribute__((device)) inline void *__nv_cvta_shared_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(3))) *)__ptr;
}
__attribute__((device)) inline void *__nv_cvta_constant_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(4))) *)__ptr;
}
__attribute__((device)) inline void *__nv_cvta_local_to_generic_impl(size_t __ptr) {
  return (void *)(void __attribute__((address_space(5))) *)__ptr;
}
__attribute__((device)) inline uint32_t __nvvm_get_smem_pointer(void *__ptr) {
  return __nv_cvta_generic_to_shared_impl(__ptr);
}
}
# 474 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_complex_builtins.h" 1 3
# 86 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_complex_builtins.h" 3
extern "C" {


__attribute__((device)) inline double _Complex __muldc3(double __a, double __b, double __c,
                                    double __d) {
  double __ac = __a * __c;
  double __bd = __b * __d;
  double __ad = __a * __d;
  double __bc = __b * __c;
  double _Complex z;
  __real__(z) = __ac - __bd;
  __imag__(z) = __ad + __bc;
  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
    int __recalc = 0;
    if (std::isinf(__a) || std::isinf(__b)) {
      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
      if (std::isnan(__c))
        __c = std::copysign(0, __c);
      if (std::isnan(__d))
        __d = std::copysign(0, __d);
      __recalc = 1;
    }
    if (std::isinf(__c) || std::isinf(__d)) {
      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
      if (std::isnan(__a))
        __a = std::copysign(0, __a);
      if (std::isnan(__b))
        __b = std::copysign(0, __b);
      __recalc = 1;
    }
    if (!__recalc &&
        (std::isinf(__ac) || std::isinf(__bd) || std::isinf(__ad) || std::isinf(__bc))) {
      if (std::isnan(__a))
        __a = std::copysign(0, __a);
      if (std::isnan(__b))
        __b = std::copysign(0, __b);
      if (std::isnan(__c))
        __c = std::copysign(0, __c);
      if (std::isnan(__d))
        __d = std::copysign(0, __d);
      __recalc = 1;
    }
    if (__recalc) {


      __real__(z) = __builtin_huge_val() * (__a * __c - __b * __d);
      __imag__(z) = __builtin_huge_val() * (__a * __d + __b * __c);
    }
  }
  return z;
}

__attribute__((device)) inline float _Complex __mulsc3(float __a, float __b, float __c, float __d) {
  float __ac = __a * __c;
  float __bd = __b * __d;
  float __ad = __a * __d;
  float __bc = __b * __c;
  float _Complex z;
  __real__(z) = __ac - __bd;
  __imag__(z) = __ad + __bc;
  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
    int __recalc = 0;
    if (std::isinf(__a) || std::isinf(__b)) {
      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
      if (std::isnan(__c))
        __c = std::copysign(0, __c);
      if (std::isnan(__d))
        __d = std::copysign(0, __d);
      __recalc = 1;
    }
    if (std::isinf(__c) || std::isinf(__d)) {
      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
      if (std::isnan(__a))
        __a = std::copysign(0, __a);
      if (std::isnan(__b))
        __b = std::copysign(0, __b);
      __recalc = 1;
    }
    if (!__recalc &&
        (std::isinf(__ac) || std::isinf(__bd) || std::isinf(__ad) || std::isinf(__bc))) {
      if (std::isnan(__a))
        __a = std::copysign(0, __a);
      if (std::isnan(__b))
        __b = std::copysign(0, __b);
      if (std::isnan(__c))
        __c = std::copysign(0, __c);
      if (std::isnan(__d))
        __d = std::copysign(0, __d);
      __recalc = 1;
    }
    if (__recalc) {
      __real__(z) = __builtin_huge_valf() * (__a * __c - __b * __d);
      __imag__(z) = __builtin_huge_valf() * (__a * __d + __b * __c);
    }
  }
  return z;
}

__attribute__((device)) inline double _Complex __divdc3(double __a, double __b, double __c,
                                    double __d) {
  int __ilogbw = 0;


  double __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
  if (std::isfinite(__logbw)) {
    __ilogbw = (int)__logbw;
    __c = std::scalbn(__c, -__ilogbw);
    __d = std::scalbn(__d, -__ilogbw);
  }
  double __denom = __c * __c + __d * __d;
  double _Complex z;
  __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
  __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
    if ((__denom == 0.0) && (!std::isnan(__a) || !std::isnan(__b))) {
      __real__(z) = std::copysign(__builtin_huge_val(), __c) * __a;
      __imag__(z) = std::copysign(__builtin_huge_val(), __c) * __b;
    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
               std::isfinite(__d)) {
      __a = std::copysign(std::isinf(__a) ? 1.0 : 0.0, __a);
      __b = std::copysign(std::isinf(__b) ? 1.0 : 0.0, __b);
      __real__(z) = __builtin_huge_val() * (__a * __c + __b * __d);
      __imag__(z) = __builtin_huge_val() * (__b * __c - __a * __d);
    } else if (std::isinf(__logbw) && __logbw > 0.0 && std::isfinite(__a) &&
               std::isfinite(__b)) {
      __c = std::copysign(std::isinf(__c) ? 1.0 : 0.0, __c);
      __d = std::copysign(std::isinf(__d) ? 1.0 : 0.0, __d);
      __real__(z) = 0.0 * (__a * __c + __b * __d);
      __imag__(z) = 0.0 * (__b * __c - __a * __d);
    }
  }
  return z;
}

__attribute__((device)) inline float _Complex __divsc3(float __a, float __b, float __c, float __d) {
  int __ilogbw = 0;
  float __logbw = std::logb(max(std::abs(__c), std::abs(__d)));
  if (std::isfinite(__logbw)) {
    __ilogbw = (int)__logbw;
    __c = std::scalbn(__c, -__ilogbw);
    __d = std::scalbn(__d, -__ilogbw);
  }
  float __denom = __c * __c + __d * __d;
  float _Complex z;
  __real__(z) = std::scalbn((__a * __c + __b * __d) / __denom, -__ilogbw);
  __imag__(z) = std::scalbn((__b * __c - __a * __d) / __denom, -__ilogbw);
  if (std::isnan(__real__(z)) && std::isnan(__imag__(z))) {
    if ((__denom == 0) && (!std::isnan(__a) || !std::isnan(__b))) {
      __real__(z) = std::copysign(__builtin_huge_valf(), __c) * __a;
      __imag__(z) = std::copysign(__builtin_huge_valf(), __c) * __b;
    } else if ((std::isinf(__a) || std::isinf(__b)) && std::isfinite(__c) &&
               std::isfinite(__d)) {
      __a = std::copysign(std::isinf(__a) ? 1 : 0, __a);
      __b = std::copysign(std::isinf(__b) ? 1 : 0, __b);
      __real__(z) = __builtin_huge_valf() * (__a * __c + __b * __d);
      __imag__(z) = __builtin_huge_valf() * (__b * __c - __a * __d);
    } else if (std::isinf(__logbw) && __logbw > 0 && std::isfinite(__a) &&
               std::isfinite(__b)) {
      __c = std::copysign(std::isinf(__c) ? 1 : 0, __c);
      __d = std::copysign(std::isinf(__d) ? 1 : 0, __d);
      __real__(z) = 0 * (__a * __c + __b * __d);
      __imag__(z) = 0 * (__b * __c - __a * __d);
    }
  }
  return z;
}


}
# 475 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 486 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
# 1 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 1 3
# 107 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/stdlib.h" 1 3
# 108 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 2 3
# 1 "/usr/include/memory.h" 1 3 4
# 109 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 2 3


# 1 "/usr/local/cuda-11.7/include/curand.h" 1 3
# 71 "/usr/local/cuda-11.7/include/curand.h" 3
extern "C" {
# 90 "/usr/local/cuda-11.7/include/curand.h" 3
enum curandStatus {
    CURAND_STATUS_SUCCESS = 0,
    CURAND_STATUS_VERSION_MISMATCH = 100,
    CURAND_STATUS_NOT_INITIALIZED = 101,
    CURAND_STATUS_ALLOCATION_FAILED = 102,
    CURAND_STATUS_TYPE_ERROR = 103,
    CURAND_STATUS_OUT_OF_RANGE = 104,
    CURAND_STATUS_LENGTH_NOT_MULTIPLE = 105,
    CURAND_STATUS_DOUBLE_PRECISION_REQUIRED = 106,
    CURAND_STATUS_LAUNCH_FAILURE = 201,
    CURAND_STATUS_PREEXISTING_FAILURE = 202,
    CURAND_STATUS_INITIALIZATION_FAILED = 203,
    CURAND_STATUS_ARCH_MISMATCH = 204,
    CURAND_STATUS_INTERNAL_ERROR = 999
};


typedef enum curandStatus curandStatus_t;


enum curandRngType {
    CURAND_RNG_TEST = 0,
    CURAND_RNG_PSEUDO_DEFAULT = 100,
    CURAND_RNG_PSEUDO_XORWOW = 101,
    CURAND_RNG_PSEUDO_MRG32K3A = 121,
    CURAND_RNG_PSEUDO_MTGP32 = 141,
    CURAND_RNG_PSEUDO_MT19937 = 142,
    CURAND_RNG_PSEUDO_PHILOX4_32_10 = 161,
    CURAND_RNG_QUASI_DEFAULT = 200,
    CURAND_RNG_QUASI_SOBOL32 = 201,
    CURAND_RNG_QUASI_SCRAMBLED_SOBOL32 = 202,
    CURAND_RNG_QUASI_SOBOL64 = 203,
    CURAND_RNG_QUASI_SCRAMBLED_SOBOL64 = 204
};


typedef enum curandRngType curandRngType_t;


enum curandOrdering {
    CURAND_ORDERING_PSEUDO_BEST = 100,
    CURAND_ORDERING_PSEUDO_DEFAULT = 101,
    CURAND_ORDERING_PSEUDO_SEEDED = 102,
    CURAND_ORDERING_PSEUDO_LEGACY = 103,
    CURAND_ORDERING_PSEUDO_DYNAMIC = 104,
    CURAND_ORDERING_QUASI_DEFAULT = 201
};


typedef enum curandOrdering curandOrdering_t;


enum curandDirectionVectorSet {
    CURAND_DIRECTION_VECTORS_32_JOEKUO6 = 101,
    CURAND_SCRAMBLED_DIRECTION_VECTORS_32_JOEKUO6 = 102,
    CURAND_DIRECTION_VECTORS_64_JOEKUO6 = 103,
    CURAND_SCRAMBLED_DIRECTION_VECTORS_64_JOEKUO6 = 104
};


typedef enum curandDirectionVectorSet curandDirectionVectorSet_t;


typedef unsigned int curandDirectionVectors32_t[32];


typedef unsigned long long curandDirectionVectors64_t[64];


struct curandGenerator_st;


typedef struct curandGenerator_st *curandGenerator_t;


typedef double curandDistribution_st;
typedef curandDistribution_st *curandDistribution_t;
typedef struct curandDistributionShift_st *curandDistributionShift_t;


typedef struct curandDistributionM2Shift_st *curandDistributionM2Shift_t;
typedef struct curandHistogramM2_st *curandHistogramM2_t;
typedef unsigned int curandHistogramM2K_st;
typedef curandHistogramM2K_st *curandHistogramM2K_t;
typedef curandDistribution_st curandHistogramM2V_st;
typedef curandHistogramM2V_st *curandHistogramM2V_t;

typedef struct curandDiscreteDistribution_st *curandDiscreteDistribution_t;


enum curandMethod {
    CURAND_CHOOSE_BEST = 0,
    CURAND_ITR = 1,
    CURAND_KNUTH = 2,
    CURAND_HITR = 3,
    CURAND_M1 = 4,
    CURAND_M2 = 5,
    CURAND_BINARY_SEARCH = 6,
    CURAND_DISCRETE_GAUSS = 7,
    CURAND_REJECTION = 8,
    CURAND_DEVICE_API = 9,
    CURAND_FAST_REJECTION = 10,
    CURAND_3RD = 11,
    CURAND_DEFINITION = 12,
    CURAND_POISSON = 13
};

typedef enum curandMethod curandMethod_t;
# 334 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandCreateGenerator(curandGenerator_t *generator, curandRngType_t rng_type);
# 414 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandCreateGeneratorHost(curandGenerator_t *generator, curandRngType_t rng_type);
# 428 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandDestroyGenerator(curandGenerator_t generator);
# 444 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGetVersion(int *version);
# 460 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGetProperty(libraryPropertyType type, int *value);
# 477 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandSetStream(curandGenerator_t generator, cudaStream_t stream);
# 496 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandSetPseudoRandomGeneratorSeed(curandGenerator_t generator, unsigned long long seed);
# 514 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandSetGeneratorOffset(curandGenerator_t generator, unsigned long long offset);
# 539 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandSetGeneratorOrdering(curandGenerator_t generator, curandOrdering_t order);
# 559 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandSetQuasiRandomGeneratorDimensions(curandGenerator_t generator, unsigned int num_dimensions);
# 589 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerate(curandGenerator_t generator, unsigned int *outputPtr, size_t num);
# 617 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateLongLong(curandGenerator_t generator, unsigned long long *outputPtr, size_t num);
# 646 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateUniform(curandGenerator_t generator, float *outputPtr, size_t num);
# 676 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateUniformDouble(curandGenerator_t generator, double *outputPtr, size_t num);
# 722 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateNormal(curandGenerator_t generator, float *outputPtr,
                     size_t n, float mean, float stddev);
# 770 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateNormalDouble(curandGenerator_t generator, double *outputPtr,
                     size_t n, double mean, double stddev);
# 818 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateLogNormal(curandGenerator_t generator, float *outputPtr,
                     size_t n, float mean, float stddev);
# 867 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateLogNormalDouble(curandGenerator_t generator, double *outputPtr,
                     size_t n, double mean, double stddev);
# 893 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandCreatePoissonDistribution(double lambda, curandDiscreteDistribution_t *discrete_distribution);
# 909 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandDestroyDistribution(curandDiscreteDistribution_t discrete_distribution);
# 942 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGeneratePoisson(curandGenerator_t generator, unsigned int *outputPtr,
                     size_t n, double lambda);

curandStatus_t
curandGeneratePoissonMethod(curandGenerator_t generator, unsigned int *outputPtr,
                     size_t n, double lambda, curandMethod_t method);


curandStatus_t
curandGenerateBinomial(curandGenerator_t generator, unsigned int *outputPtr,
                       size_t num, unsigned int n, double p);

curandStatus_t
curandGenerateBinomialMethod(curandGenerator_t generator,
                             unsigned int *outputPtr,
                             size_t num, unsigned int n, double p,
                             curandMethod_t method);
# 981 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGenerateSeeds(curandGenerator_t generator);
# 1005 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGetDirectionVectors32(curandDirectionVectors32_t *vectors[], curandDirectionVectorSet_t set);
# 1023 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGetScrambleConstants32(unsigned int * * constants);
# 1047 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGetDirectionVectors64(curandDirectionVectors64_t *vectors[], curandDirectionVectorSet_t set);
# 1065 "/usr/local/cuda-11.7/include/curand.h" 3
curandStatus_t
curandGetScrambleConstants64(unsigned long long * * constants);


}
# 112 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 2 3
# 1 "/usr/local/cuda-11.7/include/curand_mtgp32.h" 1 3
# 138 "/usr/local/cuda-11.7/include/curand_mtgp32.h" 3
struct mtgp32_params_fast;

struct mtgp32_params_fast {
    int mexp;
    int pos;
    int sh1;
    int sh2;
    unsigned int tbl[16];
    unsigned int tmp_tbl[16];
    unsigned int flt_tmp_tbl[16];

    unsigned int mask;
    unsigned char poly_sha1[21];
};


typedef struct mtgp32_params_fast mtgp32_params_fast_t;


struct mtgp32_kernel_params;
struct mtgp32_kernel_params {
    unsigned int pos_tbl[200];
    unsigned int param_tbl[200][16];
    unsigned int temper_tbl[200][16];
    unsigned int single_temper_tbl[200][16];
    unsigned int sh1_tbl[200];
    unsigned int sh2_tbl[200];
    unsigned int mask[1];
};


typedef struct mtgp32_kernel_params mtgp32_kernel_params_t;
# 191 "/usr/local/cuda-11.7/include/curand_mtgp32.h" 3
struct curandStateMtgp32;

struct curandStateMtgp32 {
    unsigned int s[1024];
    int offset;
    int pIdx;
    mtgp32_kernel_params_t * k;
};


typedef struct curandStateMtgp32 curandStateMtgp32_t;
# 113 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 2 3
# 136 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) unsigned int para_rec(mtgp32_kernel_params_t * k,unsigned int X1, unsigned int X2, unsigned int Y, int bid) {
    unsigned int X = (X1 & k->mask[0]) ^ X2;
    unsigned int MAT;

    X ^= X << k->sh1_tbl[bid];
    Y = X ^ (Y >> k->sh2_tbl[bid]);
    MAT = k->param_tbl[bid][Y & 0x0f];
    return Y ^ MAT;
}
# 154 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) unsigned int temper(mtgp32_kernel_params_t * k,unsigned int V, unsigned int T, int bid) {
    unsigned int MAT;

    T ^= T >> 16;
    T ^= T >> 8;
    MAT = k->temper_tbl[bid][T & 0x0f];
    return V ^ MAT;
}
# 173 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) unsigned int temper_single(mtgp32_kernel_params_t * k,unsigned int V, unsigned int T, int bid) {
    unsigned int MAT;
    unsigned int r;

    T ^= T >> 16;
    T ^= T >> 8;
    MAT = k->single_temper_tbl[bid][T & 0x0f];
    r = (V >> 9) ^ MAT;
    return r;
}
# 195 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) unsigned int curand(curandStateMtgp32_t *state)
{
    unsigned int t;
    unsigned int d;
    int pos = state->k->pos_tbl[state->pIdx];
    unsigned int r;
    unsigned int o;

    d = blockDim.z * blockDim.y * blockDim.x;

    t = (blockDim.z * blockDim.y * threadIdx.z) + (blockDim.x * threadIdx.y) + threadIdx.x;
    r = para_rec(state->k, state->s[(t + state->offset) & 1023],
             state->s[(t + state->offset + 1) & 1023],
             state->s[(t + state->offset + pos) & 1023],
             state->pIdx);

    state->s[(t + state->offset + 351) & 1023] = r;
    o = temper(state->k, r,
           state->s[(t + state->offset + pos -1) & 1023],
           state->pIdx);

    __syncthreads();

    if (t == 0)
    {
        state->offset = (state->offset + d) & 1023;
    }

    __syncthreads();

    return o;

}
# 246 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) unsigned int curand_mtgp32_specific(curandStateMtgp32_t *state, unsigned char index, unsigned char n)
{
    unsigned int t;
    int pos = state->k->pos_tbl[state->pIdx];
    unsigned int r;
    unsigned int o;

    t = index;
    r = para_rec(state->k, state->s[(t + state->offset) & 1023],
             state->s[(t + state->offset + 1) & 1023],
             state->s[(t + state->offset + pos) & 1023],
             state->pIdx);

    state->s[(t + state->offset + 351) & 1023] = r;
    o = temper(state->k, r,
           state->s[(t + state->offset + pos -1) & 1023],
           state->pIdx);

    __syncthreads();

    if (index == 0)
    {
        state->offset = (state->offset + n) & 1023;
    }

    __syncthreads();

    return o;
}
# 290 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) float curand_mtgp32_single(curandStateMtgp32_t *state)
{
    unsigned int t;
    unsigned int d;
    int pos = state->k->pos_tbl[state->pIdx];
    unsigned int r;
    unsigned int o_u;
    float o_f;


    t = blockDim.z * blockDim.y;
    d = t * blockDim.x;

    t += threadIdx.x;
    r = para_rec(state->k, state->s[(t + state->offset) & 1023],
             state->s[(t + state->offset + 1) & 1023],
             state->s[(t + state->offset + pos) & 1023],
             state->pIdx);

    state->s[t] = r;
    o_u = temper_single(state->k, r,
                        state->s[(t + state->offset + pos -1) & 1023],
                        state->pIdx);

    __syncthreads();

    if (threadIdx.x == 0)
    {
        state->offset = (state->offset + d) & 1023;
    }

    __syncthreads();

    memcpy(&o_f, &o_u, sizeof(o_u));
    return o_f;
}
# 351 "/usr/local/cuda-11.7/include/curand_mtgp32_kernel.h" 3
static __inline__ __attribute__((always_inline)) __attribute__((device)) float curand_mtgp32_single_specific(curandStateMtgp32_t *state, unsigned char index, unsigned char n)
{
    unsigned int t;
    int pos = state->k->pos_tbl[state->pIdx];
    unsigned int r;
    unsigned int o_u;
    float o_f;

    t = index;
    r = para_rec(state->k, state->s[(t + state->offset) & 1023],
             state->s[(t + state->offset + 1) & 1023],
             state->s[(t + state->offset + pos) & 1023],
             state->pIdx);

    state->s[t] = r;
    o_u = temper_single(state->k, r,
                        state->s[(t + state->offset + pos -1) & 1023],
                        state->pIdx);

    __syncthreads();

    if (threadIdx.x == 0)
    {
        state->offset = (state->offset + n) & 1023;
    }

    __syncthreads();

    memcpy(&o_f, &o_u, sizeof(o_u));
    return o_f;
}
# 487 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 2 3
# 497 "/usr/lib/llvm-14/lib/clang/14.0.0/include/__clang_cuda_runtime_wrapper.h" 3
extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim,
                                                size_t sharedMem = 0,
                                                void *stream = 0);
# 2 "<built-in>" 2
# 1 "vecadd.cu" 2

# 1 "/usr/include/stdio.h" 1 3 4
# 27 "/usr/include/stdio.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/libc-header-start.h" 1 3 4
# 28 "/usr/include/stdio.h" 2 3 4

extern "C" {


# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stddef.h" 1 3 4
# 34 "/usr/include/stdio.h" 2 3 4


# 1 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stdarg.h" 1 3 4
# 14 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stdarg.h" 3 4
typedef __builtin_va_list va_list;
# 32 "/usr/lib/llvm-14/lib/clang/14.0.0/include/stdarg.h" 3 4
typedef __builtin_va_list __gnuc_va_list;
# 37 "/usr/include/stdio.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h" 1 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h" 1 3 4
# 13 "/usr/include/x86_64-linux-gnu/bits/types/__mbstate_t.h" 3 4
typedef struct
{
  int __count;
  union
  {
    unsigned int __wch;
    char __wchb[4];
  } __value;
} __mbstate_t;
# 6 "/usr/include/x86_64-linux-gnu/bits/types/__fpos_t.h" 2 3 4


typedef struct _G_fpos_t
{
  __off_t __pos;
  __mbstate_t __state;
} __fpos_t;
# 40 "/usr/include/stdio.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h" 1 3 4
# 10 "/usr/include/x86_64-linux-gnu/bits/types/__fpos64_t.h" 3 4
typedef struct _G_fpos64_t
{
  __off64_t __pos;
  __mbstate_t __state;
} __fpos64_t;
# 41 "/usr/include/stdio.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/__FILE.h" 1 3 4


struct _IO_FILE;
typedef struct _IO_FILE __FILE;
# 42 "/usr/include/stdio.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/FILE.h" 1 3 4


struct _IO_FILE;


typedef struct _IO_FILE FILE;
# 43 "/usr/include/stdio.h" 2 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h" 1 3 4
# 35 "/usr/include/x86_64-linux-gnu/bits/types/struct_FILE.h" 3 4
struct _IO_FILE;
struct _IO_marker;
struct _IO_codecvt;
struct _IO_wide_data;


typedef void _IO_lock_t;


struct _IO_FILE
{
  int _flags;


  char *_IO_read_ptr;
  char *_IO_read_end;
  char *_IO_read_base;
  char *_IO_write_base;
  char *_IO_write_ptr;
  char *_IO_write_end;
  char *_IO_buf_base;
  char *_IO_buf_end;


  char *_IO_save_base;
  char *_IO_backup_base;
  char *_IO_save_end;

  struct _IO_marker *_markers;

  struct _IO_FILE *_chain;

  int _fileno;
  int _flags2;
  __off_t _old_offset;


  unsigned short _cur_column;
  signed char _vtable_offset;
  char _shortbuf[1];

  _IO_lock_t *_lock;


  __off64_t _offset;

  struct _IO_codecvt *_codecvt;
  struct _IO_wide_data *_wide_data;
  struct _IO_FILE *_freeres_list;
  void *_freeres_buf;
  size_t __pad5;
  int _mode;

  char _unused2[15 * sizeof (int) - 4 * sizeof (void *) - sizeof (size_t)];
};
# 44 "/usr/include/stdio.h" 2 3 4


# 1 "/usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h" 1 3 4
# 27 "/usr/include/x86_64-linux-gnu/bits/types/cookie_io_functions_t.h" 3 4
typedef __ssize_t cookie_read_function_t (void *__cookie, char *__buf,
                                          size_t __nbytes);


typedef __ssize_t cookie_write_function_t (void *__cookie, const char *__buf,
                                           size_t __nbytes);


typedef int cookie_seek_function_t (void *__cookie, __off64_t *__pos, int __w);


typedef int cookie_close_function_t (void *__cookie);


typedef struct _IO_cookie_io_functions_t
{
  cookie_read_function_t *read;
  cookie_write_function_t *write;
  cookie_seek_function_t *seek;
  cookie_close_function_t *close;
} cookie_io_functions_t;
# 47 "/usr/include/stdio.h" 2 3 4


typedef __gnuc_va_list va_list;
# 84 "/usr/include/stdio.h" 3 4
typedef __fpos_t fpos_t;


typedef __fpos64_t fpos64_t;
# 133 "/usr/include/stdio.h" 3 4
# 1 "/usr/include/x86_64-linux-gnu/bits/stdio_lim.h" 1 3 4
# 134 "/usr/include/stdio.h" 2 3 4
# 143 "/usr/include/stdio.h" 3 4
extern FILE *stdin;
extern FILE *stdout;
extern FILE *stderr;


extern int remove (const char *__filename) noexcept (true);

extern int rename (const char *__old, const char *__new) noexcept (true);


extern int renameat (int __oldfd, const char *__old, int __newfd,
       const char *__new) noexcept (true);
# 170 "/usr/include/stdio.h" 3 4
extern int renameat2 (int __oldfd, const char *__old, int __newfd,
        const char *__new, unsigned int __flags) noexcept (true);


extern int fclose (FILE *__stream);
# 188 "/usr/include/stdio.h" 3 4
extern FILE *tmpfile (void)
  __attribute__ ((__malloc__)) ;
# 200 "/usr/include/stdio.h" 3 4
extern FILE *tmpfile64 (void)
   __attribute__ ((__malloc__)) ;


extern char *tmpnam (char[20]) noexcept (true) ;


extern char *tmpnam_r (char __s[20]) noexcept (true) ;
# 222 "/usr/include/stdio.h" 3 4
extern char *tempnam (const char *__dir, const char *__pfx)
   noexcept (true) __attribute__ ((__malloc__)) ;


extern int fflush (FILE *__stream);
# 239 "/usr/include/stdio.h" 3 4
extern int fflush_unlocked (FILE *__stream);
# 249 "/usr/include/stdio.h" 3 4
extern int fcloseall (void);
# 258 "/usr/include/stdio.h" 3 4
extern FILE *fopen (const char *__restrict __filename,
      const char *__restrict __modes)
  __attribute__ ((__malloc__)) ;


extern FILE *freopen (const char *__restrict __filename,
        const char *__restrict __modes,
        FILE *__restrict __stream) ;
# 283 "/usr/include/stdio.h" 3 4
extern FILE *fopen64 (const char *__restrict __filename,
        const char *__restrict __modes)
  __attribute__ ((__malloc__)) ;
extern FILE *freopen64 (const char *__restrict __filename,
   const char *__restrict __modes,
   FILE *__restrict __stream) ;


extern FILE *fdopen (int __fd, const char *__modes) noexcept (true)
  __attribute__ ((__malloc__)) ;


extern FILE *fopencookie (void *__restrict __magic_cookie,
     const char *__restrict __modes,
     cookie_io_functions_t __io_funcs) noexcept (true)
  __attribute__ ((__malloc__)) ;


extern FILE *fmemopen (void *__s, size_t __len, const char *__modes)
  noexcept (true) __attribute__ ((__malloc__)) ;


extern FILE *open_memstream (char **__bufloc, size_t *__sizeloc) noexcept (true)
  __attribute__ ((__malloc__)) ;
# 328 "/usr/include/stdio.h" 3 4
extern void setbuf (FILE *__restrict __stream, char *__restrict __buf) noexcept (true);


extern int setvbuf (FILE *__restrict __stream, char *__restrict __buf,
      int __modes, size_t __n) noexcept (true);


extern void setbuffer (FILE *__restrict __stream, char *__restrict __buf,
         size_t __size) noexcept (true);


extern void setlinebuf (FILE *__stream) noexcept (true);


extern int fprintf (FILE *__restrict __stream,
      const char *__restrict __format, ...);


extern int printf (const char *__restrict __format, ...);

extern int sprintf (char *__restrict __s,
      const char *__restrict __format, ...) noexcept (true);


extern int vfprintf (FILE *__restrict __s, const char *__restrict __format,
       __gnuc_va_list __arg);


extern int vprintf (const char *__restrict __format, __gnuc_va_list __arg);

extern int vsprintf (char *__restrict __s, const char *__restrict __format,
       __gnuc_va_list __arg) noexcept (true);


extern int snprintf (char *__restrict __s, size_t __maxlen,
       const char *__restrict __format, ...)
     noexcept (true) __attribute__ ((__format__ (__printf__, 3, 4)));

extern int vsnprintf (char *__restrict __s, size_t __maxlen,
        const char *__restrict __format, __gnuc_va_list __arg)
     noexcept (true) __attribute__ ((__format__ (__printf__, 3, 0)));


extern int vasprintf (char **__restrict __ptr, const char *__restrict __f,
        __gnuc_va_list __arg)
     noexcept (true) __attribute__ ((__format__ (__printf__, 2, 0))) ;
extern int __asprintf (char **__restrict __ptr,
         const char *__restrict __fmt, ...)
     noexcept (true) __attribute__ ((__format__ (__printf__, 2, 3))) ;
extern int asprintf (char **__restrict __ptr,
       const char *__restrict __fmt, ...)
     noexcept (true) __attribute__ ((__format__ (__printf__, 2, 3))) ;


extern int vdprintf (int __fd, const char *__restrict __fmt,
       __gnuc_va_list __arg)
     __attribute__ ((__format__ (__printf__, 2, 0)));
extern int dprintf (int __fd, const char *__restrict __fmt, ...)
     __attribute__ ((__format__ (__printf__, 2, 3)));


extern int fscanf (FILE *__restrict __stream,
     const char *__restrict __format, ...) ;


extern int scanf (const char *__restrict __format, ...) ;

extern int sscanf (const char *__restrict __s,
     const char *__restrict __format, ...) noexcept (true);
# 434 "/usr/include/stdio.h" 3 4
extern int fscanf (FILE *__restrict __stream, const char *__restrict __format, ...) __asm__ ("" "__isoc99_fscanf") ;


extern int scanf (const char *__restrict __format, ...) __asm__ ("" "__isoc99_scanf") ;

extern int sscanf (const char *__restrict __s, const char *__restrict __format, ...) noexcept (true) __asm__ ("" "__isoc99_sscanf");
# 459 "/usr/include/stdio.h" 3 4
extern int vfscanf (FILE *__restrict __s, const char *__restrict __format,
      __gnuc_va_list __arg)
     __attribute__ ((__format__ (__scanf__, 2, 0))) ;


extern int vscanf (const char *__restrict __format, __gnuc_va_list __arg)
     __attribute__ ((__format__ (__scanf__, 1, 0))) ;


extern int vsscanf (const char *__restrict __s,
      const char *__restrict __format, __gnuc_va_list __arg)
     noexcept (true) __attribute__ ((__format__ (__scanf__, 2, 0)));


extern int vfscanf (FILE *__restrict __s, const char *__restrict __format, __gnuc_va_list __arg) __asm__ ("" "__isoc99_vfscanf")


     __attribute__ ((__format__ (__scanf__, 2, 0))) ;
extern int vscanf (const char *__restrict __format, __gnuc_va_list __arg) __asm__ ("" "__isoc99_vscanf")

     __attribute__ ((__format__ (__scanf__, 1, 0))) ;
extern int vsscanf (const char *__restrict __s, const char *__restrict __format, __gnuc_va_list __arg) noexcept (true) __asm__ ("" "__isoc99_vsscanf")


     __attribute__ ((__format__ (__scanf__, 2, 0)));
# 513 "/usr/include/stdio.h" 3 4
extern int fgetc (FILE *__stream);
extern int getc (FILE *__stream);


extern int getchar (void);


extern int getc_unlocked (FILE *__stream);
extern int getchar_unlocked (void);
# 538 "/usr/include/stdio.h" 3 4
extern int fgetc_unlocked (FILE *__stream);
# 549 "/usr/include/stdio.h" 3 4
extern int fputc (int __c, FILE *__stream);
extern int putc (int __c, FILE *__stream);


extern int putchar (int __c);
# 565 "/usr/include/stdio.h" 3 4
extern int fputc_unlocked (int __c, FILE *__stream);


extern int putc_unlocked (int __c, FILE *__stream);
extern int putchar_unlocked (int __c);


extern int getw (FILE *__stream);


extern int putw (int __w, FILE *__stream);


extern char *fgets (char *__restrict __s, int __n, FILE *__restrict __stream)
                                                         ;
# 605 "/usr/include/stdio.h" 3 4
extern char *gets (char *__s) __attribute__ ((__deprecated__));
# 615 "/usr/include/stdio.h" 3 4
extern char *fgets_unlocked (char *__restrict __s, int __n,
        FILE *__restrict __stream)
                                                  ;
# 632 "/usr/include/stdio.h" 3 4
extern __ssize_t __getdelim (char **__restrict __lineptr,
                             size_t *__restrict __n, int __delimiter,
                             FILE *__restrict __stream) ;
extern __ssize_t getdelim (char **__restrict __lineptr,
                           size_t *__restrict __n, int __delimiter,
                           FILE *__restrict __stream) ;


extern __ssize_t getline (char **__restrict __lineptr,
                          size_t *__restrict __n,
                          FILE *__restrict __stream) ;


extern int fputs (const char *__restrict __s, FILE *__restrict __stream);


extern int puts (const char *__s);


extern int ungetc (int __c, FILE *__stream);


extern size_t fread (void *__restrict __ptr, size_t __size,
       size_t __n, FILE *__restrict __stream) ;


extern size_t fwrite (const void *__restrict __ptr, size_t __size,
        size_t __n, FILE *__restrict __s);
# 691 "/usr/include/stdio.h" 3 4
extern int fputs_unlocked (const char *__restrict __s,
      FILE *__restrict __stream);
# 702 "/usr/include/stdio.h" 3 4
extern size_t fread_unlocked (void *__restrict __ptr, size_t __size,
         size_t __n, FILE *__restrict __stream) ;
extern size_t fwrite_unlocked (const void *__restrict __ptr, size_t __size,
          size_t __n, FILE *__restrict __stream);


extern int fseek (FILE *__stream, long int __off, int __whence);


extern long int ftell (FILE *__stream) ;


extern void rewind (FILE *__stream);
# 736 "/usr/include/stdio.h" 3 4
extern int fseeko (FILE *__stream, __off_t __off, int __whence);


extern __off_t ftello (FILE *__stream) ;
# 760 "/usr/include/stdio.h" 3 4
extern int fgetpos (FILE *__restrict __stream, fpos_t *__restrict __pos);


extern int fsetpos (FILE *__stream, const fpos_t *__pos);
# 779 "/usr/include/stdio.h" 3 4
extern int fseeko64 (FILE *__stream, __off64_t __off, int __whence);
extern __off64_t ftello64 (FILE *__stream) ;
extern int fgetpos64 (FILE *__restrict __stream, fpos64_t *__restrict __pos);
extern int fsetpos64 (FILE *__stream, const fpos64_t *__pos);


extern void clearerr (FILE *__stream) noexcept (true);

extern int feof (FILE *__stream) noexcept (true) ;

extern int ferror (FILE *__stream) noexcept (true) ;


extern void clearerr_unlocked (FILE *__stream) noexcept (true);
extern int feof_unlocked (FILE *__stream) noexcept (true) ;
extern int ferror_unlocked (FILE *__stream) noexcept (true) ;


extern void perror (const char *__s);


extern int fileno (FILE *__stream) noexcept (true) ;


extern int fileno_unlocked (FILE *__stream) noexcept (true) ;
# 823 "/usr/include/stdio.h" 3 4
extern int pclose (FILE *__stream);


extern FILE *popen (const char *__command, const char *__modes)
  __attribute__ ((__malloc__)) ;


extern char *ctermid (char *__s) noexcept (true)
                                     ;


extern char *cuserid (char *__s)
                                     ;


struct obstack;


extern int obstack_printf (struct obstack *__restrict __obstack,
      const char *__restrict __format, ...)
     noexcept (true) __attribute__ ((__format__ (__printf__, 2, 3)));
extern int obstack_vprintf (struct obstack *__restrict __obstack,
       const char *__restrict __format,
       __gnuc_va_list __args)
     noexcept (true) __attribute__ ((__format__ (__printf__, 2, 0)));


extern void flockfile (FILE *__stream) noexcept (true);


extern int ftrylockfile (FILE *__stream) noexcept (true) ;


extern void funlockfile (FILE *__stream) noexcept (true);
# 885 "/usr/include/stdio.h" 3 4
extern int __uflow (FILE *);
extern int __overflow (FILE *, int);
# 902 "/usr/include/stdio.h" 3 4
}
# 3 "vecadd.cu" 2
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/stdlib.h" 1 3
# 4 "vecadd.cu" 2
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/math.h" 1 3
# 36 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/math.h" 3
# 1 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 1 3
# 40 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/cmath" 3
# 37 "/usr/bin/../lib/gcc/x86_64-linux-gnu/12/../../../../include/c++/12/math.h" 2 3

using std::abs;
using std::acos;
using std::asin;
using std::atan;
using std::atan2;
using std::cos;
using std::sin;
using std::tan;
using std::cosh;
using std::sinh;
using std::tanh;
using std::exp;
using std::frexp;
using std::ldexp;
using std::log;
using std::log10;
using std::modf;
using std::pow;
using std::sqrt;
using std::ceil;
using std::fabs;
using std::floor;
using std::fmod;


using std::fpclassify;
using std::isfinite;
using std::isinf;
using std::isnan;
using std::isnormal;
using std::signbit;
using std::isgreater;
using std::isgreaterequal;
using std::isless;
using std::islessequal;
using std::islessgreater;
using std::isunordered;


using std::acosh;
using std::asinh;
using std::atanh;
using std::cbrt;
using std::copysign;
using std::erf;
using std::erfc;
using std::exp2;
using std::expm1;
using std::fdim;
using std::fma;
using std::fmax;
using std::fmin;
using std::hypot;
using std::ilogb;
using std::lgamma;
using std::llrint;
using std::llround;
using std::log1p;
using std::log2;
using std::logb;
using std::lrint;
using std::lround;
using std::nearbyint;
using std::nextafter;
using std::nexttoward;
using std::remainder;
using std::remquo;
using std::rint;
using std::round;
using std::scalbln;
using std::scalbn;
using std::tgamma;
using std::trunc;
# 5 "vecadd.cu" 2

const double epsilon = 1e-6;

__attribute__((global)) void vecAdd(double *a, double *b, double *c, int n)
{

    int id = blockIdx.x*blockDim.x+threadIdx.x;


    if (id < n)
        c[id] = a[id] + b[id];
}

int main( int argc, char* argv[] )
{


    int n = 100000;


    double *h_a;
    double *h_b;

    double *h_c;


    double *d_a;
    double *d_b;

    double *d_c;


    size_t bytes = n*sizeof(double);


    h_a = (double*)malloc(bytes);
    h_b = (double*)malloc(bytes);
    h_c = (double*)malloc(bytes);


    cudaMalloc(&d_a, bytes);
    cudaMalloc(&d_b, bytes);
    cudaMalloc(&d_c, bytes);

    int i;

    for( i = 0; i < n; i++ ) {
        h_a[i] = sin(i)*sin(i);
        h_b[i] = cos(i)*cos(i);
    }


    cudaMemcpy( d_a, h_a, bytes, cudaMemcpyHostToDevice);
    cudaMemcpy( d_b, h_b, bytes, cudaMemcpyHostToDevice);

    int blockSize, gridSize;


    blockSize = 1024;


    gridSize = (int)ceil((float)n/blockSize);


    vecAdd<<<gridSize, blockSize>>>(d_a, d_b, d_c, n);


    cudaMemcpy( h_c, d_c, bytes, cudaMemcpyDeviceToHost );


    double sum = 0;
    for(i=0; i<n; i++)
        sum += h_c[i];
    sum/=(double)n;
    if(abs(sum-1.0)<epsilon)
        printf("PASS\n");
    else
        printf("FAIL\n");


    cudaFree(d_a);
    cudaFree(d_b);
    cudaFree(d_c);


    free(h_a);
    free(h_b);
    free(h_c);

    return 0;
}