/*
 *     Copyright (c) 2017-2018, NVIDIA CORPORATION.  All rights reserved.
 *
 * NVIDIA CORPORATION and its licensors retain all intellectual property
 * and proprietary rights in and to this software, related documentation
 * and any modifications thereto.  Any use, reproduction, disclosure or
 * distribution of this software and related documentation without an express
 * license agreement from NVIDIA CORPORATION is strictly prohibited.
 *
 */

#ifndef __PGI_ULLONG
#ifdef TARGET_WIN_X8664
#define __PGI_ULLONG unsigned long long
#define __PGI_LLONG long long
#else
#define __PGI_ULLONG unsigned long
#define __PGI_LLONG long
#endif
#define __PGI_MAX_ULLONG (18446744073709551615ull)
#endif

#ifndef _DEF_OPENACC
#define _DEF_OPENACC

#ifndef _DEF_ACC_DEVICE_T
#define _DEF_ACC_DEVICE_T
#include "nvhpc_acc_device_type.h"
typedef enum {
  acc_device_none = nvhpc_acc_device_none,
  acc_device_default = nvhpc_acc_device_default,
  acc_device_host = nvhpc_acc_device_host,
  acc_device_not_host = nvhpc_acc_device_not_host,
  acc_device_nvidia = nvhpc_acc_device_nvidia,
  acc_device_pgi_opencl = nvhpc_acc_device_RESERVED7,
  acc_device_nvidia_opencl = nvhpc_acc_device_RESERVED8,
  acc_device_opencl = nvhpc_acc_device_RESERVED9,
  acc_device_current = nvhpc_acc_device_current
} acc_device_t;
#endif
#ifndef _DEF_ACC_DEVICE_PROPERTY_T
#define _DEF_ACC_DEVICE_PROPERTY_T
typedef enum {
  acc_property_none = 0,
  acc_property_memory = 1,
  acc_property_free_memory = 2,
  acc_property_name = 3,
  acc_property_vendor = 4,
  acc_property_driver = 5
} acc_device_property_t;
#endif
#ifndef _DEF_ACCX_MEM_MODE_T
#define _DEF_ACCX_MEM_MODE_T
typedef enum {
  accx_mem_mode_default = 0,
  accx_mem_mode_separated = 1,
  accx_mem_mode_unified = 2,
} accx_mem_mode_t;
#endif
#ifndef _DEF_ACCX_MEM_HINTS_T
#define _DEF_ACCX_MEM_HINTS_T
typedef enum {
  accx_mem_hints_default = 0,     /* runtime decides, can be set with environment variable */
  accx_mem_hints_disable,         /* memory hints disabled */ 
  accx_mem_hints_enable_explicit, /* memory hints enabled at explicit data constructs only */
  accx_mem_hints_enable_all       /* memory hints enabled at explicit and implicit data constructs */
} accx_mem_hints_t;
#endif
#ifndef _DEF_ACCX_MEM_PREFETCH_T
#define _DEF_ACCX_MEM_PREFETCH_T
typedef enum {
  accx_mem_prefetch_default = 0,     /* runtime decides, can be set with environment variable */
  accx_mem_prefetch_disable,         /* memory prefetch disabled */
  accx_mem_prefetch_enable_update,   /* memory prefetch enabled at update data constructs only */
  accx_mem_prefetch_enable_explicit, /* memory prefetch enabled at explicit data constructs only */
  accx_mem_prefetch_enable_all       /* memory prefetch enabled at explicit and implicit data constructs */
} accx_mem_prefetch_t;
#endif
#ifndef _DEF_ACCX_MEM_ADVISE_T
#define _DEF_ACCX_MEM_ADVISE_T
typedef enum {
  accx_mem_advise_set_read_mostly = 0,
  accx_mem_advise_unset_read_mostly,
  accx_mem_advise_set_preferred_location,
  accx_mem_advise_unset_preferred_location,
  accx_mem_advise_set_accessed_by,
  accx_mem_advise_unset_accessed_by,
} accx_mem_advise_t;
#endif
#ifndef _DEF_ACCX_MEM_ADVISE_LOCATION_T
#define _DEF_ACCX_MEM_ADVISE_LOCATION_T
typedef enum {
  accx_mem_advise_location_none = 0,  /* location not set */
  accx_mem_advise_location_host,      /* location host */
  accx_mem_advise_location_device     /* location device */
} accx_mem_advise_location_t;
#endif
#ifndef _DEF_ACCX_MEM_PREFETCH_LOCATION_T
#define _DEF_ACCX_MEM_PREFETCH_LOCATION_T
typedef enum {
  accx_mem_prefetch_location_host = 0,   /* prefetch location is host */
  accx_mem_prefetch_location_device      /* prefetch location is device */
} accx_mem_prefetch_location_t;
#endif
#ifdef __cplusplus
extern "C" {
#endif

/* size_t */
#include <stdio.h>

extern void acc_set_default_async(int async);
extern int acc_get_default_async(void);
extern void acc_set_host_only();
extern int acc_get_num_devices(acc_device_t devtype);
extern acc_device_t acc_get_device(void);
extern void acc_set_device_num(int devnum, acc_device_t devtype);
extern int acc_get_device_num(acc_device_t devtype);
extern size_t acc_get_property(int devnum, acc_device_t devtype,
                               acc_device_property_t property);
extern const char *acc_get_property_string(int devnum, acc_device_t devtype,
                                           acc_device_property_t property);
extern void acc_init(acc_device_t devtype);
extern void acc_init_device(int devnum, acc_device_t devtype);
extern void acc_shutdown(acc_device_t devtype);
extern void acc_shutdown_device(int devnum, acc_device_t devtype);
extern void acc_set_deviceid(int devid);
extern int acc_get_deviceid(int devnum, acc_device_t devtype);
extern int acc_async_test(__PGI_LLONG async);
extern int acc_async_test_all(void);
extern void acc_async_wait(__PGI_LLONG async);
extern void acc_async_wait_all(void);
extern void acc_wait(__PGI_LLONG async);
extern void acc_wait_async(__PGI_LLONG arg, __PGI_LLONG async);
extern void acc_wait_all(void);
extern void acc_wait_all_async(__PGI_LLONG async);
extern int acc_wait_any(int count, int *async);
extern int acc_on_device(acc_device_t devtype);
extern void acc_free(void *);

extern void *acc_memcpy(void *targetptr, void *srcptr, __PGI_ULLONG bytes);
extern void *acc_memcpy_async(void *targetptr, void *srcptr, __PGI_ULLONG bytes,
                              __PGI_LLONG async);
extern void *acc_copyin(void *hostptr, __PGI_ULLONG bytes);
extern void *acc_copyin_async(void *hostptr, __PGI_ULLONG bytes,
                              __PGI_LLONG async);
extern void *acc_pcopyin(void *hostptr, __PGI_ULLONG bytes);
extern void *acc_pcopyin_async(void *hostptr, __PGI_ULLONG bytes,
                               __PGI_LLONG async);
extern void *acc_present_or_copyin(void *hostptr, __PGI_ULLONG bytes);
extern void *acc_present_or_copyin_async(void *hostptr, __PGI_ULLONG bytes,
                                         __PGI_LLONG async);
extern void *acc_create(void *hostptr, __PGI_ULLONG bytes);
extern void *acc_create_async(void *hostptr, __PGI_ULLONG bytes,
                              __PGI_LLONG async);
extern void *acc_pcreate(void *hostptr, __PGI_ULLONG bytes);
extern void *acc_pcreate_async(void *hostptr, __PGI_ULLONG bytes,
                               __PGI_LLONG async);
extern void *acc_present_or_create(void *hostptr, __PGI_ULLONG bytes);
extern void *acc_present_or_create_async(void *hostptr, __PGI_ULLONG bytes,
                                         __PGI_LLONG async);
extern void acc_copyout(void *hostptr, __PGI_ULLONG bytes);
extern void acc_copyout_async(void *hostptr, __PGI_ULLONG bytes,
                              __PGI_LLONG async);
extern void acc_copyout_finalize(void *hostptr, __PGI_ULLONG bytes);
extern void acc_copyout_finalize_async(void *hostptr, __PGI_ULLONG bytes,
                                       __PGI_LLONG async);
extern void acc_delete(void *hostptr, __PGI_ULLONG bytes);
extern void acc_delete_async(void *hostptr, __PGI_ULLONG bytes,
                             __PGI_LLONG async);
extern void acc_delete_finalize(void *hostptr, __PGI_ULLONG bytes);
extern void acc_delete_finalize_async(void *hostptr, __PGI_ULLONG bytes,
                                      __PGI_LLONG async);
extern void acc_update_device(void *hostptr, __PGI_ULLONG bytes);
extern void acc_update_device_async(void *hostptr, __PGI_ULLONG bytes,
                                    __PGI_LLONG async);
extern void acc_update_self(void *hostptr, __PGI_ULLONG bytes);
extern void acc_update_self_async(void *hostptr, __PGI_ULLONG bytes,
                                  __PGI_LLONG async);
extern void acc_update_host(void *hostptr, __PGI_ULLONG bytes);
extern void acc_update_host_async(void *hostptr, __PGI_ULLONG bytes,
                                  __PGI_LLONG async);
extern void acc_memcpy_to_device(void *devptr, void *hostptr,
                                 __PGI_ULLONG bytes);
extern void acc_memcpy_to_device_async(void *devptr, void *hostptr,
                                       __PGI_ULLONG bytes, __PGI_LLONG async);
extern void acc_memcpy_from_device(void *hostptr, void *devptr,
                                   __PGI_ULLONG bytes);
extern void acc_memcpy_from_device_async(void *hostptr, void *devptr,
                                         __PGI_ULLONG bytes, __PGI_LLONG async);
extern void *acc_memcpy_device(void *targetdevptr, void *srcdevptr,
                               __PGI_ULLONG bytes);
extern void *acc_memcpy_device_async(void *targetdevptr, void *srcdevptr,
                                     __PGI_ULLONG bytes, __PGI_LLONG async);
extern void acc_memcpy_d2d(void* dst, void* src, __PGI_ULLONG sz,
                           int dstdev, int srcdev);
extern void acc_memcpy_d2d_async(void* dst, void* src, __PGI_ULLONG sz,
                                 int dstdev, int srcdev, __PGI_LLONG srcasync);
extern void acc_attach(void **hostptrptr);
extern void acc_attach_async(void **hostptrptr, __PGI_LLONG async);
extern void acc_detach(void **hostptrptr);
extern void acc_detach_async(void **hostptrptr, __PGI_LLONG async);
extern void acc_detach_finalize(void **hostptrptr);
extern void acc_detach_finalize_async(void **hostptrptr, __PGI_LLONG async);
extern void acc_compare(void *hostptr, __PGI_LLONG count);
extern void acc_compare_all(void);
extern size_t pgi_compare(const void *hostptr, const char *dtypename,
                          __PGI_ULLONG count, const char *varname,
                          const char *filename, const char *funcname,
                          int linenum);
extern size_t pcast_compare(const void *hostptr, const char *dtypename,
                            __PGI_ULLONG count, const char *varname,
                            const char *filename, const char *funcname,
                            int linenum);

extern void acc_set_device_type(acc_device_t devtype);
extern acc_device_t acc_get_device_type(void);
extern void *acc_malloc(__PGI_ULLONG);
extern void *acc_deviceptr(void *hostptr);
extern void *acc_hostptr(void *devptr);
extern void acc_map_data(void *hostptr, void *devptr, __PGI_ULLONG bytes);
extern void acc_unmap_data(void *hostptr);
extern int acc_is_present(void *hostptr, __PGI_ULLONG bytes);
extern int acc_present_count(void *hostptr);
extern void acc_updatein(void *hostptr, __PGI_ULLONG bytes);
extern void acc_updatein_async(void *hostptr, __PGI_ULLONG bytes,
                               __PGI_LLONG async);
extern void acc_updateout(void *hostptr, __PGI_ULLONG bytes);
extern void acc_updateout_async(void *hostptr, __PGI_ULLONG bytes,
                                __PGI_LLONG async);
extern void acc_set_num_cores(int numcores);
extern int acc_get_num_cores(void);

extern void *acc_get_current_cuda_context(void);
extern int acc_get_current_cuda_device(void);
extern void *acc_get_cuda_stream(__PGI_LLONG);
extern void acc_set_cuda_stream(__PGI_LLONG, void *);
extern void *acc_cuda_get_context(int); /* PGI: get context for given device id */
extern int acc_cuda_get_device(int);    /* PGI: get device for given device id */

extern void *acc_get_current_opencl_context(void);
extern void *acc_get_current_opencl_device(void);
extern void *acc_get_opencl_queue(__PGI_LLONG);

/* PGI: we now support all the atomic functions on multicore as well as device
 */
extern int atomicaddi(void *address, int val);
extern unsigned int atomicaddu(void *address, unsigned int val);
extern unsigned long long atomicaddul(void *address, unsigned long long val);
extern float atomicaddf(void *address, float val);
extern double atomicaddd(void *address, double val);

extern int atomicsubi(void *address, int val);
extern unsigned int atomicsubu(void *address, unsigned int val);
extern unsigned long long atomicsubul(void *address, unsigned long long val);
extern float atomicsubf(void *address, float val);
extern double atomicsubd(void *address, double val);

extern int atomicmaxi(void *address, int val);
extern unsigned int atomicmaxu(void *address, unsigned int val);
extern unsigned long long atomicmaxul(void *address, unsigned long long val);
extern float atomicmaxf(void *address, float val);
extern double atomicmaxd(void *address, double val);

extern int atomicmini(void *address, int val);
extern unsigned int atomicminu(void *address, unsigned int val);
extern unsigned long long atomicminul(void *address, unsigned long long val);
extern float atomicminf(void *address, float val);
extern double atomicmind(void *address, double val);

extern int atomicandi(void *address, int val);
extern unsigned int atomicandu(void *address, unsigned int val);
extern unsigned long long atomicandul(void *address, unsigned long long val);

extern int atomicori(void *address, int val);
extern unsigned int atomicoru(void *address, unsigned int val);
extern unsigned long long atomicorul(void *address, unsigned long long val);

extern int atomicxori(void *address, int val);
extern unsigned int atomicxoru(void *address, unsigned int val);
extern unsigned long long atomicxorul(void *address, unsigned long long val);

extern int atomicexchi(void *address, int val);
extern unsigned int atomicexchu(void *address, unsigned int val);
extern unsigned long long atomicexchul(void *address, unsigned long long val);
extern float atomicexchf(void *address, float val);
extern double atomicexchd(void *address, double val);

extern unsigned int atomicincu(void *address, unsigned int val);

extern unsigned int atomicdecu(void *address, unsigned int val);

extern int atomiccasi(void *address, int val, int val2);
extern unsigned int atomiccasu(void *address, unsigned int val,
                               unsigned int val2);
extern unsigned long long atomiccasul(void *address, unsigned long long val,
                                      unsigned long long val2);
extern float atomiccasf(void *address, float val, float val2);
extern double atomiccasd(void *address, double val, double val2);

extern int __pgi_gangidx(void);
extern int __pgi_workeridx(void);
extern int __pgi_vectoridx(void);
extern int __pgi_blockidx(int);
extern int __pgi_threadidx(int);

/* Implementation defined functions */
extern void accx_set_debug(int debug);
extern void accx_set_synchronous(int synchronous);
extern void accx_set_mem_mode(accx_mem_mode_t value);
extern void accx_set_mem_hints(accx_mem_hints_t value);
extern void accx_set_mem_prefetch(accx_mem_prefetch_t value);
extern void accx_mem_advise(accx_mem_advise_t advice,
                            accx_mem_advise_location_t location,
                            void *ptr, __PGI_ULLONG bytes);
extern void accx_mem_prefetch(accx_mem_prefetch_location_t location,
                              void *ptr,  __PGI_ULLONG bytes,
                              __PGI_LLONG async);
typedef struct { void *graph; void *graph_exec; } acc_graph_t;
extern void accx_async_begin_capture(__PGI_LLONG async);
extern void accx_async_begin_capture_auto(__PGI_LLONG checksum, __PGI_LLONG async);
extern void accx_async_end_capture(__PGI_LLONG async, acc_graph_t *pgraph);
extern void accx_async_end_capture_auto(__PGI_LLONG checksum, __PGI_LLONG async);
extern void accx_graph_launch(acc_graph_t *pgraph, __PGI_LLONG async);
extern void accx_graph_launch_auto(__PGI_LLONG checksum, __PGI_LLONG async);
extern void accx_graph_delete(acc_graph_t *pgraph);
extern void *accx_get_graph(acc_graph_t *pgraph);
extern void *accx_get_graph_exec(acc_graph_t *pgraph);

/* for debugging */
extern long acc_attach_check(int);
extern void acc_attach_dump(void);

#ifdef __cplusplus
}
#endif

#if !defined(__CUDACC__) && !defined(__NVCOMPILER_CUDA__)
#ifdef __cplusplus
#ifndef _HX_OPENACC_MODE
static inline int
atomicAdd(int *address, int val)
{
  return atomicaddi((void *)address, val);
}
static inline unsigned int
atomicAdd(unsigned int *address, unsigned int val)
{
  return atomicaddu((void *)address, val);
}
static inline unsigned long long int
atomicAdd(unsigned long long int *address, unsigned long long int val)
{
  return atomicaddul((void *)address, val);
}
static inline float
atomicAdd(float *address, float val)
{
  return atomicaddf((void *)address, val);
}
static inline double
atomicAdd(double *address, double val)
{
  return atomicaddd((void *)address, val);
}

static inline int
atomicSub(int *address, int val)
{
  return atomicsubi((void *)address, val);
}
static inline unsigned int
atomicSub(unsigned int *address, unsigned int val)
{
  return atomicsubu((void *)address, val);
}
static inline unsigned long long int
atomicSub(unsigned long long int *address, unsigned long long int val)
{
  return atomicsubul((void *)address, val);
}
static inline float
atomicSub(float *address, float val)
{
  return atomicsubf((void *)address, val);
}
static inline double
atomicSub(double *address, double val)
{
  return atomicsubd((void *)address, val);
}

static inline int
atomicMax(int *address, int val)
{
  return atomicmaxi((void *)address, val);
}
static inline unsigned int
atomicMax(unsigned int *address, unsigned int val)
{
  return atomicmaxu((void *)address, val);
}
static inline unsigned long long int
atomicMax(unsigned long long int *address, unsigned long long int val)
{
  return atomicmaxul((void *)address, val);
}
static inline float
atomicMax(float *address, float val)
{
  return atomicmaxf((void *)address, val);
}
static inline double
atomicMax(double *address, double val)
{
  return atomicmaxd((void *)address, val);
}

static inline int
atomicMin(int *address, int val)
{
  return atomicmini((void *)address, val);
}
static inline unsigned int
atomicMin(unsigned int *address, unsigned int val)
{
  return atomicminu((void *)address, val);
}
static inline unsigned long long int
atomicMin(unsigned long long int *address, unsigned long long int val)
{
  return atomicminul((void *)address, val);
}
static inline float
atomicMin(float *address, float val)
{
  return atomicminf((void *)address, val);
}
static inline double
atomicMin(double *address, double val)
{
  return atomicmind((void *)address, val);
}

static inline unsigned int
atomicInc(unsigned int *address, unsigned int val)
{
  return atomicincu((void *)address, val);
}

static inline unsigned int
atomicDec(unsigned int *address, unsigned int val)
{
  return atomicdecu((void *)address, val);
}

static inline int
atomicAnd(int *address, int val)
{
  return atomicandi((void *)address, val);
}
static inline unsigned int
atomicAnd(unsigned int *address, unsigned int val)
{
  return atomicandu((void *)address, val);
}
static inline unsigned long long int
atomicAnd(unsigned long long int *address, unsigned long long int val)
{
  return atomicandul((void *)address, val);
}

static inline int
atomicOr(int *address, int val)
{
  return atomicori((void *)address, val);
}
static inline unsigned int
atomicOr(unsigned int *address, unsigned int val)
{
  return atomicoru((void *)address, val);
}
static inline unsigned long long int
atomicOr(unsigned long long int *address, unsigned long long int val)
{
  return atomicorul((void *)address, val);
}

static inline int
atomicXor(int *address, int val)
{
  return atomicxori((void *)address, val);
}
static inline unsigned int
atomicXor(unsigned int *address, unsigned int val)
{
  return atomicxoru((void *)address, val);
}
static inline unsigned long long int
atomicXor(unsigned long long int *address, unsigned long long int val)
{
  return atomicxorul((void *)address, val);
}

static inline int
atomicExch(int *address, int val)
{
  return atomicexchi((void *)address, val);
}
static inline unsigned int
atomicExch(unsigned int *address, unsigned int val)
{
  return atomicexchu((void *)address, val);
}
static inline unsigned long long int
atomicExch(unsigned long long int *address, unsigned long long int val)
{
  return atomicexchul((void *)address, val);
}
static inline float
atomicExch(float *address, float val)
{
  return atomicexchf((void *)address, val);
}
static inline double
atomicExch(double *address, double val)
{
  return atomicexchd((void *)address, val);
}

static inline int
atomicCAS(int *address, int val1, int val2)
{
  return atomiccasi((void *)address, val1, val2);
}
static inline unsigned int
atomicCAS(unsigned int *address, unsigned int val1, unsigned int val2)
{
  return atomiccasu((void *)address, val1, val2);
}
static inline unsigned long long int
atomicCAS(unsigned long long int *address, unsigned long long int val1,
          unsigned long long int val2)
{
  return atomiccasul((void *)address, val1, val2);
}
static inline float
atomicCAS(float *address, float val1, float val2)
{
  return atomiccasf((void *)address, val1, val2);
}
static inline double
atomicCAS(double *address, double val1, double val2)
{
  return atomiccasd((void *)address, val1, val2);
}
#endif /* _HX_OPENACC_MODE */
#endif /* _cplusplus */
#endif /* !defined(__CUDACC__) && !defined(__NVCOMPILER_CUDA__) */

#define acc_async_sync -1
#define acc_async_default -3
#define acc_async_noval -4

#endif
