#
#     Copyright (c) 2017-2019, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
#

# Include information about the supported compute capabilities for
# each architecture (cinlude will conditionally include, any of those
# files exist only in their proper architecture)
cinclude ../rcfiles/acclin8664rc;
cinclude ../rcfiles/acclinuxarm64rc;
cinclude ../rcfiles/acclinuxpower64rc;
cinclude ../rcfiles/accwin64rc;
cinclude ../rcfiles/nvvmversionlinx8664rc;
cinclude ../rcfiles/nvvmversionaarch64rc;
cinclude ../rcfiles/nvvmversionwinx8664rc;

# nvdd was moved to bin/tools directory, so we need the "../" to get to bin/rcfiles.
include ../rcfiles/cudaselectrc;

# NVDD cross-platform configuration
#
variable PATH is environment(PATH);
variable NEWWINPATH is default($NVVMWINBINDIR;$PATH);
variable NEWPATH is default($PATH);

# LIBRARY path settings
variable LDLIB is environment(LD_LIBRARY_PATH);
variable NEWLDLIB is default($LDLIB);

variable DYLDLIB is environment(DYLD_LIBRARY_PATH);
variable NEWDYLDLIB is default($DYLDLIB);

variable GENMODID is default(1);
variable NOPTXCU is default(0);
variable MSC_VER is default(1400);

# TODO: can we detect this instead of hard coding it? It is the oldest CC
# we support, which is described in acc<target>rc SYSCAP.
variable COMPUTECAP is default(35);
variable CCOMPUTECAP is default($COMPUTECAP);

variable DEFPTXOPT is default(3);
variable PTXOPT is default($DEFPTXOPT);

variable MAJOR is default(2025) help(Major release version);

variable CACHEL12 is default() help(Set when caching in both Level 1 and 2);
variable CACHEL2 is default() help(Set when caching in Level 2 only);

variable USENVVMDEV is default(0);
variable USENVVMNEXT is default(0);
variable NVVMCGVER is default(nvvm70);
variable NVVMVERSIONENV is environment(NVVM_VERSION);
variable NVVMVERSION is default($if($NVVMVERSIONENV,$NVVMVERSIONENV,$NVVMLATESTVERSION));
# Directory name for the bitcode files under `compilers/lib/`, can be `nvvm70`, `nvvm-next`
variable NVVM is default(nvvm70);
variable NVVMBC is default($if($lor($USENVVMNEXT,$USENVVMDEV),nvvm-next,$NVVM));
variable CUDA_MAJOR_VAL is default($substr($CUDAXXYY,0,1));
variable CUDA_MINOR_VAL is default($substr($CUDAXXYY,3,3));
variable CUDA_MAJOR is default($if($expr($CUDAXXYY<12000),11,$(CUDA_MAJOR_VAL)));

# Environment variables to select custom tools from the CUDA toolkit
variable NVVM_PATH_ENV is environment(NVCOMPILER_NVVM_PATH);
variable PTXAS_PATH_ENV is environment(NVCOMPILER_PTXAS_PATH);
variable FATBINARY_PATH_ENV is environment(NVCOMPILER_FATBINARY_PATH);
variable NVLINK_PATH_ENV is environment(NVCOMPILER_NVLINK_PATH);
variable USEF18CUDART is default(0);

switch -nvvm70 is hide
    help(Use nvvm70 and convert .gpu to .bc)
    set(USEGPUBC=1);

switch -nvvm-dev is hide
    help(Use nvvm-dev (from nvvm SOLID kitpick)) 
    set(USENVVMDEV=$if($expr($CUDAXXYY>=12000),1,0))
    set(NVVM=$if($expr($CUDAXXYY>=12000),nvvm-next,nvvm70))
    set(NVVMVEROPT=nvvm-latest);

switch -nvvm-next is hide
    help(Use nvvm-next (from CUDA toolkit))
    set(USENVVMNEXT=$if($expr($CUDAXXYY>=12000),1,0))
    set(NVVM=$if($expr($CUDAXXYY>=12000),nvvm-next,nvvm70))
    set(NVVMBC=nvvm-next)
    set(NVVMCGVER=nvvm-next)
    set(NVVMVEROPT=nvvm-latest);

switch -vasp-fix is hide
	help(Enable the -vasp-fix flag in nvvm)
	append(NVVMFLAGS=-vasp-fix);

variable NVVMNEXTISDEFAULT is default(0); # TBD
variable NVVM70BASEDIR is default(nvvm);
variable NVVMNEXTBASEDIR is default(nvvm);
variable NVVMDEVBASE is default(/proj/cuda/nvvm);
variable NVVMPKGDIR is default($NVVMVERSION);
variable NVVMDEVBASEDIRTOOLS is default($NVVMDEVBASE/$TARGETSTR/$NVVMPKGDIR);
variable NVVMDEVBASEDIR is default($NVVMDEVBASE/$TARGETSTR/$NVVMPKGDIR/nvvm);
variable NVVMBASEDIR is default($if($land($USENVVMDEV,$expr($CUDAXXYY>=12000)),$NVVMDEVBASEDIR,$USECUDAROOT/$if($USENVVMNEXT,$NVVMNEXTBASEDIR,$NVVM70BASEDIR)));
variable NVVMOSXLIBDIR is default($NVVMBASEDIR/lib);
variable NVVMLINUXLIBDIRSUFFIX is default(64);
variable NVVMLINUXLIBDIR is default($if($USENODEFAULTCUDA,$if($NVVM_PATH_ENV,$NVVM_PATH_ENV/lib$NVVMLINUXLIBDIRSUFFIX,$NVVMBASEDIR/lib$NVVMLINUXLIBDIRSUFFIX),$NVVMBASEDIR/lib$NVVMLINUXLIBDIRSUFFIX));
variable NVVMWINBINDIR is default($NVVMBASEDIR/bin);

variable NVVMVERIFY is default(0);
switch -nvvmverify is hide
	help(verify nvvm code before compilation)
	set(NVVMVERIFY=1);

variable NVVMNOFLAGS is default(0);
variable NVVMOPTLEVEL is default(3); #only opt levels 0 and 3 are supported, at least as of cuda 9.2
variable NVVMVEROPT is default() help(flag to set nvvm version (default nvvm70 with no option));
variable NVVMDEBUG is default();
variable NVVMFLAGS is help(flags to nvvm);
variable NVVMLINEINFO is default();
variable NVVMDIR is
	default($NVVMBASEDIR/bin);
variable NVVMCCDIR is
	default($USECUDAROOT/libnvvm);
variable NVOPEN64DIR is
	default($USECUDAROOT/open64/lib);
variable PGICUDAROOT is
	default($dirname($CDRIVERDIR));
variable F18CUDAROOT is
        default($COMPBASE/$COMPSYS/$COMPVER/$(COMPLIBPREFIX)lib);
variable PGICUDAINCDIR is
	default($PGICUDAROOT/include_acc);
variable PGICUDAINC is
	default($PGICUDAINCDIR $PGICUDAINC_OT);
variable CUDAFEDIR is
	default($USECUDAFEROOT/bin);
variable CUDALIB is
	default($USECUDAROOT/lib64);
variable CUDAINC is
	default($USECUDAROOT/include);
variable CUDAMATHDIR is
	default($dirname($dirname($dirname($COMPBIN)))/math_libs/$CUDAVERSION);
variable CUDAMATHINCDIR is
	default($CUDAMATHDIR/include);
variable DEBUG is default(0);
variable DEVDEBUG is default(0);
variable DEBUG1 is default(0);
variable DEBUG2 is default(0);
variable DEBUG3 is default(0);
variable LINEINFO is default(0);
variable ALLOWUNDEFGBLS is default(0);

variable LLFILEPRELINKED is default(0);
variable INC is		# set by -I
	default();
variable STDINC is
	default($CUDAMATHINCDIR $CUDAINC $CUDAINC/cudart $PGICUDAINC);
variable DEF is		# set by -D
	default();
variable DEFFASTMATHMACRO is
	default(-D__USE_FAST_MATH__=0);
variable FASTMATHMACRO is
	default($DEFFASTMATHMACRO);
variable CUDEF23 is
	default($FASTMATHMACRO);
variable CUPRECISE is
	default(__CUDA_PREC_DIV __CUDA_PREC_SQRT);
variable PGIEXT is default();
variable CUSTDDEF1 is
	default(__CUDACC__ __NVCC__ $if($FTZ,__CUDA_FTZ) $CUPRECISE $if($index($TARGET,win64,win64-llvm,win64edg-llvm),_USE_DECLSPECS_FOR_SAL) $PGIEXT __CUDACC_VER_MAJOR__=$CUDA_MAJOR_VAL __CUDACC_VER_MINOR__=$CUDA_MINOR_VAL);
variable CUSTDDEF2 is
	default(__CUDACC__ __NVCC__ $if($FTZ,__CUDA_FTZ) $CUPRECISE __CUDANVVM__ $PGIEXT __CUDACC_VER_MAJOR__=$CUDA_MAJOR_VAL __CUDACC_VER_MINOR__=$CUDA_MINOR_VAL);
variable CUSTDDEF3 is
	default(__CUDABE__ $if($FTZ,__CUDA_FTZ) $CUPRECISE $if($index($TARGET,win64,win64-llvm,win64edg-llvm),__GNUC__) __CUDANVVM__ $PGIEXT);
# cicc as of version CUDA 10.2 does not support __float128 or __ieee128 type as built-in. Since we do not generate __float128 in our GPU files,
# ensure preprocessor does not think it is supported and thus generate incompatible code for cicc.
variable CUSTUDEF1 is
	default(__FLOAT128__);
variable STDDEF is
	default();

variable ECHOBIN is default($if($index($TARGET,win64,win64-llvm,win64edg-llvm),pgecho,echo));

variable DRYRUN is default(0);

variable KEEPTEMP is default(0);

variable CUPREPRO is default(1);

variable PREINC is default();

variable DOPTXINFO is default(0);
variable PTXINFO is default();

variable CUOPT is default(3);
variable GOPT is default();

variable FTZ is default(0);

variable USELL is default(0);
variable USENVVM is default(0);
variable USEOMPGPUCG is default(0);
variable USEOMPGPUCGDBG is default();
variable USEOMPGPUCGNVCC is default();
variable OMPTARGETOFFLOAD is default(0);

variable REGFILE is default();
variable REGOBJ is default();
variable DOREG is default(0);
variable CUDALINK is default(0);
variable UNIFIEDMEM is default(0);
variable NORDC is default(0);

variable DEVICE_BITCODEVSN is default(10);
variable DEVICE_BACKUP_CAP is default(30);
variable DEVICE_BITCODE_BACKUP2_FILE is default($if($USENODEFAULTCUDA,$if($NVVM_PATH_ENV,$NVVM_PATH_ENV,$NVVMBASEDIR),$NVVMBASEDIR)/libdevice/libdevice.$DEVICE_BITCODEVSN.bc);
variable DEVICE_BITCODE_BACKUP_FILE is default($NVVMBASEDIR/libdevice/libdevice.compute_$DEVICE_BACKUP_CAP.$DEVICE_BITCODEVSN.bc);
variable DEVICE_BITCODE_FILE is default($NVVMBASEDIR/libdevice/libdevice.compute_$COMPUTECAP.$DEVICE_BITCODEVSN.bc);
variable DEVICE_BITCODE is default($if($isfile($DEVICE_BITCODE_FILE),$DEVICE_BITCODE_FILE,$if($isfile($DEVICE_BITCODE_BACKUP_FILE),$DEVICE_BITCODE_BACKUP_FILE,$if($isfile($DEVICE_BITCODE_BACKUP2_FILE),$DEVICE_BITCODE_BACKUP2_FILE))));

# List of runtime bitcode files, add in this variable a new bitcode
# file when needed.
variable DEVICE_BITCODE_FILES is default(
      $if($DEVICE_MATHUNIFORM,$PGICUDAROOT/lib/libnvgpumath.bc)
      $if($NEEDCURAND,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_curand_runtime.bc)
      $if($NEEDNVSHMEM,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_nvshmem_runtime.bc)
      $if($expr($COMPUTECAP>=60),$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_cuda_real2_runtime.bc)
      $if($expr($COMPUTECAP>=70),$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_cuda_wmma_runtime$if($expr($COMPUTECAP>=80),_cc80).bc)
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_cuda_builtin_intrinsics_runtime$if($expr($COMPUTECAP>=80),_cc80).bc
      # NVOMP should be after `nvhpc_omp_runtime.bc` to make sure it
      # can resolve any references to the OpenMP RT
      $if($OMPTARGETOFFLOAD,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_omp_runtime.bc)
      $if($USEOMPGPUCG,$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/nvomp_dev$(USEOMPGPUCGNVCC)$(USEOMPGPUCGDBG).bc)
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_utils_runtime$if($expr($COMPUTECAP>=90),_cc90).bc
      $ifn($index($TARGET,win64,win64-llvm,win64edg-llvm),$PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$NVHPCCUDACPPBUILTINS)
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$NVHPCRUNTIME
      $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$NVHPCRUNTIMEBUILTINS
      # If there is no available LLVM BC part of the F18 runtime,
      # the PTX/CUBIN F18 runtime will be linked at the link stage
      # to resolve all the references.
      $if($land($USEF18CUDART,$isdir($F18CUDAROOT),$equal($NVVMBC,nvvm70)),$F18CUDAROOT/$NVVMBC/$CUDA_MAJOR/$F18CUDABCRUNTIME)
      # Now that we are using the `nvvmLazyAddModuleToProgram` to add
      # our bitcode files, the `libdevice.10.bc` needs to be added
      # last, because when using the "lazy approach", nvvm need to
      # have seen all the requirements of functions in current module.
      $DEVICE_BITCODE
);

variable NVGNUVERSION is default($if($index($TARGET,win32,win64,win64-llvm,win64edg-llvm),0,$GCCVERSION));

variable DEVICE_MATHUNIFORM is default(0);

# Device Link Time Optimization
variable NEEDCUDALTO is default(0);
variable LINKCUDALTO is default(0);
variable LTOFLAGS is default();

# Pass maxrregcount to nvlink
variable MAXRREGCOUNT is default(0);

# Set for VC 17; uses PGI '14' libraries
variable PGC_OT_REL is
  hide default(14);

# FPIFP
variable NEEDFPIFP is default(0);
variable FPIFPFPRELINKOBJFILE is default();
variable FPIFPFINDEXFILE is default();

variable USEFMA is default(1);
variable NOFASTMATH is default(1);
variable USELLC is default(0);
switch -usellc is
	help(Using alternative PTX generator)
	set(USELLC=1);

switch -dryrun is
	help(Display commands that would be executed)
	set(DRYRUN=1) set(KEEPTEMP=1);

variable WINNUL is
	default($if($index($TARGET,win64,win64-llvm,win64edg-llvm),nul));
switch -showerr is hide
	set(WINNUL=);

switch -keeptemp is
    help(Keep intermediate temporary files)
    set(KEEPTEMP=1);

switch -usegpubc is
    help(Enable conersion from .gpu to .bc using llvm-as before passing it to libNVVM)
    set(USEGPUBC=1);

switch -useoldnvvmd is
    help(Switch back to using old nvvmd. It is still the default libNVVM driver for Windows)
    set(USEOLDNVVMD=1);

switch -debuglibnvvm is
    help(Show GPU toolchain commands using cicc instead of using nvnvvmd and libNVVM)
    set(DEBUGLIBNVVM=1);

switch -keepgpubc is
    help(Keep GPU file in bitcode format (.bc))
    set(KEEPGPUBC=1);

switch -nodefaultcuda is
    help(Use component from non-standard CUDA Toolkit)
    set(USENODEFAULTCUDA=1);

switch -ot12 is  hide
        helpgroup(target)
        help(Open Tools version 12 )
        set(PSDK="C:/Program Files/PGI/Microsoft Open Tools 12")
        set(WSDK="C:/Program Files (x86)/Windows Kits/8.1")
        set(MSC_VER=1700)
        set(WSDK_VERSION=)
        set(PGI_OT_VER=__PGI_TOOLS12)
        set(PGICUDAINC_OT=$PGICUDAROOT/include_acc/OT_12);
switch -ot14 is  hide
        helpgroup(target)
        help(Open Tools version 14)
        set(PSDK="C:/Program Files/PGI/Microsoft Open Tools 14")
        set(WSDK="C:/Program Files (x86)/Windows Kits/10")
        set(MSC_VER=1700)
        #WSDK_VERSION must be set by makelocalrc . This is just a guess
        #set(WSDK_VERSION=10.0.10586.0)
        error($ifn($WSDK_VERSION," ot14 switch: unknown Windows Kit version: run makelocalrc"))
        set(PGI_OT_VER=__PGI_TOOLS14)
        set(PGICUDAINC_OT=$PGICUDAROOT/include_acc/OT_14);

switch -preinclude is
    help(Preinclude CUDA header files)
    set(PREINC=cuda_runtime.h $(PFY)_cuda_runtime.h);

switch -prepro is
    help(Run preprocessor)
    set(CUPREPRO=1);
switch -noprepro is
    help(Do not run preprocessor)
    set(CUPREPRO=0);
variable INFILE is default(0);
switch -infile is
    help(Run from a file)
    set(INFILE=1);

variable SMCAP is
	default(sm_$COMPUTECAP);

variable COMPCAP is
	default(compute_$CCOMPUTECAP);

variable CUBINFILE is default();
variable CUPTXFILE is default();

variable NVVMARGS is default();
tool tpgnvvm is
	program($TNVVM) directory($DRIVERDIR);
tool tnvvm is
	program(cicc) directory($NVVMDIR);
tool tptx is
	program(ptxas) directory($if($USENODEFAULTCUDA,$if($PTXAS_PATH_ENV,$PTXAS_PATH_ENV) $PATH) $if($USENVVMDEV,$NVVMDEVBASEDIRTOOLS,$USECUDAROOT/bin));
variable LLVMAS is environment(LLVMAS);
variable LLVMVER is default(3.1);
variable LLCDIR is default($PGICUDAROOT/share/llvm/bin);
tool llvmasllc is
	program(llvm-as)
	directory($if($LLVMAS,$LLVMAS,$LLCDIR));

tool llvmas is
	program(llvm-as)
	directory($if($LLVMAS,$LLVMAS,$LLCDIR));

tool nvvmcc is
	program(nvvmcc)
	directory($NVVMCCDIR);

tool tllc is
	program(llc)
	directory($CDRIVERDIR/llvmptx);

tool tllvmlink is
	program(llvm-link)	
	directory($CDRIVERDIR/tools/$NVVM);

tool tllvmas is
	program(llvm-as)	
	directory($CDRIVERDIR/tools/$NVVM);

tool tllc38 is
	program(llc)
	directory($LLCDIR);

tool tllvmlink38 is
	program(llvm-link)
	directory($LLCDIR);

tool tllvmopt38 is
	program(opt)
	directory($LLCDIR);
#tool techo is
#	program($ECHOBIN)
#	directory($PATH);

tool tfatbin is
	program(fatbinary) directory($if($USENODEFAULTCUDA,$if($FATBINARY_PATH_ENV,$FATBINARY_PATH_ENV) $PATH) $if($USENVVMDEV,$NVVMDEVBASEDIRTOOLS,$USECUDAROOT/bin));
tool tnvlink is
	program(nvlink) directory($if($USENODEFAULTCUDA,$if($NVLINK_PATH_ENV,$NVLINK_PATH_ENV) $PATH) $if($USENVVMDEV,$NVVMDEVBASEDIRTOOLS,$USECUDAROOT/bin));
tool timport is
	program($TIMPORT) directory($DRIVERDIR);
tool as is
	program($AS) directory($PATH $LLCDIR $CDRIVERDIR/tools);
variable AS is
	default(as);
variable EXTRAASARGS is
	default();

# Tool to generate FPIFP index file
tool tgenfpifpidx is
	program(genfpifpidx) directory($DRIVERDIR);

suffix ptx is
	string(ptx);

# Tools for converting `.gpu` file (text LLVM IR) to `.bc` (binary LLVM IR)
# variable USEGPUBC is default($if($expr($CUDAXXYY>=12080),1,0));
variable USEGPUBC is default(0);
variable KEEPGPUBC is default(0);
variable NVVMAS is default(llvm-as);
variable USEOLDNVVMD is default($if($index($TARGET,win64,win64-llvm,win64edg-llvm),1,0));
variable DEBUGLIBNVVM is default(0);
variable OUTRUNTIMEBC is default();
tool tnvvmas is program($NVVMAS) directory($DRIVERDIR/$NVVMCGVER);

variable USENODEFAULTCUDA is default(0);

suffix other is
	default;

variable INLINE is default(1);
variable MAXRCOUNT is default(0);
variable MAXNVVMTHREADS is default(-1);

switch -regs=n is
	help(Set maximum register count to use)
	set(MAXRCOUNT=$n);

switch -split-compile=n is
	help(Set maximum thread count to use during libNVVM compilation)
	set(MAXNVVMTHREADS=$n);

switch -inline is set(INLINE=1);
switch -noinline is set(INLINE=0);
switch -dp is hide ;

variable HIGHESTSUPPORTEDSYSCAP is default($last($SYSCAP));
variable NVHPCRUNTIME is default($(PFY)_cuda_runtime$if($expr(($COMPUTECAP>=60)&&($COMPUTECAP<70)),_cc60, $if($expr($COMPUTECAP>=70), _cc70)).bc);
variable NVHPCCUDACPPBUILTINS is default($(PFY)_cuda_cpp_builtins$if($expr($COMPUTECAP>=70),_cc70).bc);
variable NVHPCRUNTIMEBUILTINS is default($(PFY)_cuda_runtime_builtins_cc$if($expr($COMPUTECAP>$HIGHESTSUPPORTEDSYSCAP),$HIGHESTSUPPORTEDSYSCAP,$COMPUTECAP).bc);
variable F18CUDABCRUNTIME is default(libFortranRuntime$if($expr($COMPUTECAP<60), _cc50, $if($expr($COMPUTECAP<70), _cc60, $if($expr($COMPUTECAP<80), _cc70, $if($expr($COMPUTECAP<90), _cc80, _cc90)))).bc);

variable RELOC is default(0);
variable DOLINK is default(0);

switch -reloc is
	help(Generate relocatable code)
	set(RELOC=1);

switch -rcomputecap=n is
	set(COMPUTECAP=$n)
	set(CCOMPUTECAP=$if($expr($n<70),$n,60));

switch -noflags is
	help("Don't pass -opt -arch -ftz -prec-sqrt -prec-div -fma to nvvm")
	set(NVVMNOFLAGS=1);

switch -computecap=n is
	set(COMPUTECAP=$n);	# sets CCOMPUTECAP as well

switch -cc$n is
	set(COMPUTECAP=$n)
	set(CCOMPUTECAP=$if($expr($n<70),$n,60));

switch -dlto $file is
    set(LTOFLAGS=-gen-lto-and-llc -olto $file)
    set(NEEDCUDALTO=1);

switch -llto is
    set(NEEDCUDALTO=1)
    set(LINKCUDALTO=1);

switch -nordc is
    append(F901ARGS=-x 68 0x200)   
    set(NORDC=1);

switch -maxrregcount n is
    set(MAXRREGCOUNT=$n);

variable CUDAARCH is default($(CCOMPUTECAP)0);

variable CFEDEBUG is default();
variable FATDEBUG is default();

command prepro90 is
	help(Include preprocessor for CUDA >= 9.0)
	tool(cuprepro)
	suffix(other)
	set(outp1=$if($KEEPTEMP,$basename($input).ii,$tempfile(ii)))
	stdout($if($index($TARGET,win64,win64-llvm,win64edg-llvm),$outp1))
	stderr($WINNUL)
	arguments($CUPREPRO1ARGS $input
		-D__CUDA_ARCH__=$CUDAARCH -D__CUDA_VER__=$CUDAXXYY -D$PGIM
		$foreach(i,$INC, -I$i)
		$foreach(i,$STDINC $if($PSDK,$PSDK/include) $if($PGC_OT_REL,$WSDK/Include/$WSDKMORE/shared $WSDK/Include/$WSDKMORE/ucrt $WSDK/Include/$WSDKMORE/um, $if($WSDK,$WSDK/Include/shared $WSDK/Include/um,$if($PSDK,$PSDK/PlatformSDK/include))), -I$i)
		$foreach(d,$STDDEF, -D$d)
		$foreach(d,$HOSTDEF, -D$d)
		$foreach(d,$CUSTDDEF1, -D$d)
		$foreach(u,$CUSTUDEF1, -U$u)
		$foreach(i,$PREINC, -include $i)
		-DV$CUDAVER
		$DEF
		$if($expr($NVGNUVERSION>=110000),--std=c++17,
		$if($land($expr($NVGNUVERSION >= 60000),$expr($NVGNUVERSION < 110000)),--std=c++14,
		$if($expr($NVGNUVERSION < 60000),--std=c++11)))
		$ifn($index($TARGET,win64,win64-llvm,win64edg-llvm),-o $outp1)
		)
	execute(
	    input($outp1)
	    command(nvvm)
	);

command cstartllc is
	help(LLVM assembler)
	tool(llvmasllc)
	suffix(other)
	set(USEPGILL=1)
	set(bcfile=$if($KEEPTEMP,$basename($input).bc,$tempfile(bc)))
	arguments(-o=$bcfile $input)
	execute(
	    input($bcfile)
	    command(cpreplibs) #$if($USEPGILL,cpreplibs,cllvmlink38))
	);

command cpreplibs is
	help(Prepping PGI LLVM libs)
	tool(llvmas)
	execute(
		input($bcfile)
		command(cpreplibs2)
	);

variable DISABLE_INTERNALIZE is environment(DISABLE_INTERNALIZE);

command cpreplibs2 is
	help(Prepping PGI LLVM libs 2)
	tool(llvmas)
	set(pgill=$PGICUDAROOT/lib/llc/$(PFY)_cuda_runtime.ll)
	set(nvhpc_cuda = $tempfile(bc))
	arguments($pgill -o $nvhpc_cuda)
	execute(
		input($bcfile)
		command($if($DISABLE_INTERNALIZE,cprelink,cllvmlink38))
	);

command cprelink is
	help(Linking before internalize)
	tool(tllvmlink38)
	arguments($DEVICE_BITCODE $nvhpc_cuda -o prelinked.bc)
	execute(
		input($bcfile)
		command(cpreinternalize)
	);

command cpreinternalize is
	help(Internalizing CUDA runtime)
	tool(tllvmopt38)
	arguments(prelinked.bc -passes=internalize,always-inline -o internalized.bc)
	execute(
		input($bcfile)
		command(cllvmlink38)
	);

command cllvmlink38 is
	help(LLVM linker)
	tool(tllvmlink38)
	set(linkedbcfile=$bcfile.linked.bc)
	arguments($bcfile $if($DISABLE_INTERNALIZE,internalized.bc,$DEVICE_BITCODE $nvhpc_cuda) -o $linkedbcfile)
	execute(
	    input($bcfile)
	    command(cllvmopt38)
	);

variable INTERNALIZE_EXCLUDES is environment(INTERNALIZE_EXCLUDES);

command cllvmopt38 is
	help(LLVM opt)
	tool(tllvmopt38)
	set(linkedbcfile=$bcfile.linked.bc)
	set(optbcfile=$bcfile.opt.bc)
	arguments($if($DISABLE_INTERNALIZE,, "-passes=internalize,always-inline,function(nvvm-reflect)" -internalize-public-api-list=kernel$INTERNALIZE_EXCLUDES) $linkedbcfile -o $optbcfile)
	execute(
	    input($bcfile)
	    command(cllc38)
	);


command cllc38 is
	help(LLC LLVM to PTX)
	tool(tllc38)
	set(optbcfile=$bcfile.opt.bc)
	set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
	arguments(-mcpu=sm_$CCOMPUTECAP $optbcfile -O3 -o $out3 -march=nvptx64 -mattr=+ptx42)
	execute(
	    input($out3)
	    command(ptx)
	);

command nvvm is
	help(NVidia GPU compiler)
	tool(tnvvm)
	suffix(other)
	set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
	arguments(
	-arch $COMPCAP -m$CUWIDTH -ftz=$FTZ -prec_div=$NOFASTMATH -prec_sqrt=$NOFASTMATH -fmad=$USEFMA
	$ifn($NOFASTMATH,-fast-math)
	$if($RELOC,--device-c)
	$NVVMARGS -O$CUOPT $input -o $out3
	$ifn($index($CUDAVERSION,7.5,8.0),-w)
	$if($DEVICE_BITCODE,-nvvmir-library $DEVICE_BITCODE)
	$if($expr($NVGNUVERSION>=100000),--gnu_version=$NVGNUVERSION)
	$if($index($TARGET,win64,win64-llvm,win64edg-llvm),--c++14,
	$if($expr($NVGNUVERSION>=110000),--c++17,
	$if($land($expr($NVGNUVERSION >= 60000),$expr($NVGNUVERSION < 110000)),--c++14,
	$if($expr($NVGNUVERSION < 60000),--c++11))))
    )
	execute(
	    input($out3)
	    command(ptx)
	);


command cllvmas is
	help(LLVM assembler)
	tool(llvmas)
	suffix(other)
	set(bcfile=$if($KEEPTEMP,$basename($input).bc,$tempfile(bc)))
	arguments(-o=$bcfile $input)
	execute(
	    input($bcfile)
	    command(cnvvmcc)
	);

command cnvvmcc is
	help(NVidia LLVM GPU compiler)
	tool(nvvmcc)
	set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
	arguments(-m$CUWIDTH $input -o $out3)
	execute(
	    input($out3)
	    command(ptx)
	);

command cllc is
	help(LLVM-PTX compiler)
	tool(tllc)
	set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
	arguments(-march=nvptx64 -mattr=ptx30 -mcpu=$SMCAP -o=$out3 $input)
	execute(
	    input($out3)
	    command(ptx)
	);

command cpgnvvmdbg is
	help(LLVM-PTX compiler)
	tool(tllvmlink)
	set(out3=$if($KEEPTEMP,$basename($input).ll,$tempfile(ll)))
	set(LLFILEPRELINKED=1)
	set(NVVMOPTLEVEL=0)
	arguments($DEVICE_BITCODE $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$NVHPCRUNTIME $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$NVHPCRUNTIMEBUILTINS $PGICUDAROOT/lib/$NVVMBC/$CUDA_MAJOR/$(PFY)_nvshmem_runtime.bc $input -S -o $out3)
	execute(
	    input($out3)
	    command(cpgnvvm)
	);

command cnvvmas is
	help(Convert text based (.gpu) to binary (.bc) LLVM IR )
	tool(tnvvmas)
	set(out3=$if($KEEPGPUBC,$basename($input).bc,$tempfile(bc)))
	arguments($input -o $out3)
	execute(
	    input($out3)
	    command(cpgnvvm)
	);

command cllvmlink is
    help(LLVM Linker)
    tool(tllvmlink)
    set(OUTRUNTIMEBC=$if($KEEPGPUBC,nvhpc_runtime.bc,$tempfile(bc)))
    arguments(
        -o $OUTRUNTIMEBC
        $DEVICE_BITCODE_FILES
    )
    execute(
        input($input)
    	command(cllvmasuser)
    );

command cllvmlinkuser is
    help(LLVM Linker)
    tool(tllvmlink)
    set(outbc=$if($KEEPGPUBC,$basename($first($input)).bc,$tempfile(bc)))
    info($if($DEBUGLIBNVVM,"########## Debug commands for libNVVM - Start ##########"))
    arguments(
        -o $outbc
        $input
        $OUTRUNTIMEBC
    )
    execute(
        input($outbc)
        command(cpgnvvm)
    );
    
command cllvmasuser is
    help(LLVM Linker)
    tool(tllvmas)
    set(outbc=$if($KEEPGPUBC,$basename($first($input)).bc,$tempfile(bc)))
    info($if($DEBUGLIBNVVM,"########## Debug commands for libNVVM - Start ##########"))
    arguments(
        -o $outbc
        $input
    )
    execute(
        input($outbc)
        command(cpgnvvm)
    );

command cpgnvvm is
	help(NVVM LLVM-PTX compiler)
	suffix(other)
	tool($ifn($DEBUGLIBNVVM,tpgnvvm,tnvvm))
	set(out3=$if($CUPTXFILE,$CUPTXFILE,$if($KEEPTEMP,$basename($input).ptx,$tempfile(ptx))))
	set(extra_llfiles=$DEVICE_BITCODE_FILES)
	arguments(
        $if($DEBUGLIBNVVM,-libnvvm)
        $if($VERBOSE,-v)
        $if($NVVMVERIFY,$ifn($DEBUGLIBNVVM,-nvvmverify,))
        $ifn($DEBUGLIBNVVM,-ptx $out3,-o $out3)
	    $input
	    $if($DEBUGLIBNVVM,-nvvmir-library $OUTRUNTIMEBC)
	    # The `-gpubc` flag separates (used only with old nvvmd tool)
	    # the bitcode files from the user code so that we can process
	    # those with the `nvvmLazyAddModuleToProgram` function from
	    # libNVVM.
	    $ifn($DEBUGLIBNVVM,$ifn($LLFILEPRELINKED,$if($USEOLDNVVMD,-gpubc) $extra_llfiles))
	    $ifn($NVVMNOFLAGS,-opt=$NVVMOPTLEVEL $NVVMDEBUG -arch=$COMPCAP -ftz=$FTZ -prec-div=$NOFASTMATH -prec-sqrt=$NOFASTMATH -fma=$USEFMA $NVVMLINEINFO $if($notequal($MAXNVVMTHREADS,-1),-split-compile=$MAXNVVMTHREADS) $if($land($lor($USENVVMNEXT,$USENVVMDEV),$notequal($NVVMVEROPT,)),-nvvm-version=$NVVMVEROPT) $if($NEEDCUDALTO, $LTOFLAGS))
	    $ifn($DEBUGLIBNVVM,$ifn($equal($NVVMFLAGS,),$if($USEOLDNVVMD,-flags) $NVVMFLAGS $if($USEOLDNVVMD,-endflags)))
	)
	execute(
	    input($out3)
	    command(ptx)
	);

command ptx is
	help(PTX Assembler)
	tool(tptx)
	suffix(ptx)
	set(out4=$if($CUBINFILE,$CUBINFILE,$basename($file).cubin))
	stderr($PTXINFO)
	arguments(-arch=$SMCAP
		-fmad $if($USEFMA,true,false)
		$if($CACHEL12,-dlcm=ca,$if($CACHEL2,-dlcm=cg))
		$ifn($NOPTXCU,-m$CUWIDTH)
		$if($DEBUG1,--dont-merge-basicblocks)
		$if($DEBUG2,-g)
		$if($DEBUG3,--return-at-end)
		$ifn($DEBUG2,-O$PTXOPT)
		$if($LINEINFO,--generate-line-info)
		$if($notequal($MAXRCOUNT,0),-maxrregcount=$MAXRCOUNT) -o $out4
		$if($RELOC,--compile-only)
		$if($DOPTXINFO,-v) $input
		$PTXASARGS
		)
    info($if($DEBUGLIBNVVM,"########## Debug commands for libNVVM - End ##########"));

variable FATNAME is
	help(Identifier for the fat binary)
	default();

variable FATARGS is
	help(Fat binary arguments)
	default();

variable FATKEY is
	help(Fat binary key)
	default();

variable FATOBJ is
	help(Build object file containing fat binary)
	default();

command fatbin is
	help(Create fat binary)
	tool(tfatbin)
	set(out=$if($FATOBJ,$if($or($KEEPTEMP,$INFILE,$GPUFILE),$if($CUBINFILE,$basename($CUBINFILE).fatbin,nvfat.fatbin),$tempfile(fatbin)),$if($CUBINFILE,$CUBINFILE,$tempfile(fatbin))))
	arguments(
	    $if($RELOC,$if($expr($CUDAXY<101),--cuda) --device-c) -$CUWIDTH --create=$out
	    $if($LINKCUDALTO,--ident=\"lto\",--ident=$FATNAME) $if($or($and($DEBUG,$LINEINFO),$NEEDCUDALTO),--cmdline=\"--compile-only\" $if($LINKCUDALTO,-link)) $FATARGS
        $if($and($DEBUG,$LINEINFO),--cmdline=" -g --dont-merge-basicblocks --return-at-end --generate-line-info " -g,$if($DEBUG,--cmdline=" -g --dont-merge-basicblocks --return-at-end" -g,$if($LINEINFO,--cmdline=" --generate-line-info ")))
	$FATDEBUG)
	execute(
	    condition($and($FATOBJ,$not($INFILE)))
	    input($out)
	    command(import)
	);

variable IMPARGS is default();

variable DYNINIT is default(0);

switch -dyninit is set(DYNINIT=1);

variable GPUFILE is default(0);

switch -gpufile is set(GPUFILE=1);

# list of routine name suffixes (with a __pgi_uacc_set_ prefix)
# that should be called at .init time
variable ACCINITS is default();

switch -init=routine is
	append(ACCINITS=$routine);

switch -cudalink is
	set(CUDALINK=1);

switch -unifiedmem is
	set(UNIFIEDMEM=1);

command import is
	help(Import fat binary to assembly file)
	tool(timport)
	set(in=$ifn($equal($input,),$input,import))
	set(out=$if($KEEPTEMP,$in.s,$tempfile(s)))
	arguments($out $input .nv_fatbin
		$IMPORTARGS
		-cudaver $CUDAXXYY
		$if($equal($RTBUILD,),
			$if($smfiles,-var __NV_CUDA_LOC -ccname __NV_CUDA_CAP $IMPARGS -linkinit),
			-var $RTBUILD)
		$if($DYNINIT,-dyninit) $if($GPUFILE,-gpufile)
		$ifn($index($TARGET,win64,win64-llvm,win64edg-llvm),$if($not($NORDC),-slot 4 __pgi_fatbin_start -slot 5 __pgi_fatbin_end))
		$if($NEEDFPIFP,-slot 6 __nvhpc_nvuft_start -slot 7 __nvhpc_nvuft_end -slot 8 __nvhpc_nvudt_start -slot 9 __nvhpc_nvudt_end)
		$foreach(init,$ACCINITS, -init __pgi_uacc_set_$init) $if($CUDALINK,-cudalink) $if($UNIFIEDMEM,-unifiedmem))
	execute(
	    input($out)
	    command(fatas)
	);

variable COMPBIN is default($DRIVERDIR);
variable CCOMPDIR is		# set by -Y0
	help(Directory containing the C compiler)
	default($COMPBIN);

variable WINCONV is default(nvdd);	# dummy name
tool winconv is
	program($if($index($TARGET,win64,win64-llvm,win64edg-llvm),$WINCONV))
	directory($CCOMPDIR);

command conv is
	help(Convert fat object to PECOFF)
	tool(winconv)
	set(out=$if($CUBINFILE,$CUBINFILE,$tempfile(o)))
	arguments($input $out -dcuda $USECUDAROOT -cuda$CUDAXXYY $if($VERBOSE,-v))
	;

command fatas is
	help(Assemble fat binary to object file)
	tool(as)
	set(out=$if($CUBINFILE,$CUBINFILE,$tempfile(o)))
	append(EXTRAASARGS=$if($index($TARGET,win64-llvm,win64edg-llvm),-filetype=obj))
	# Order matters here on windows: $input MUST come before the flags.
	arguments(
	    $input $EXTRAASARGS -o $out
	)
	# TODO: do we need to convert to PECOFF on windows with the new llvm-mc?
#	execute(
#	    condition($index($TARGET,win64,win64-llvm,win64edg-llvm))
#	    input($out)
#	    command(conv)
#	)
	;

variable LDLIBS is default();

command cgenfpifpidx is
    help(Generate FPIFP index file expected by nvlink)
    tool(tgenfpifpidx)
    # Add `-debug` for verbose output and generate human readabe index file
    arguments(-i $FPIFPFPRELINKOBJFILE -o $FPIFPFINDEXFILE);

command cnvlink is
	help(Link device objects)
	tool(tnvlink)
	set(out=$if($CUBINFILE,$CUBINFILE,$basename($first($files)).cubin))
	set(reg=$if($DOREG,$if($REGFILE,$REGFILE,$dirname($out)/$basename($out).reg.c)))
    # The NVOMP RT is built with `--maxrregcount 128`, force the same at link time
	arguments($NVLINKARGS --arch=$ifn($PTXLINK,$SMCAP,$COMPCAP) -m$CUWIDTH $if($NEEDCUDALTO,-lto -nvvmpath=$NVVMBASEDIR $if($MAXRREGCOUNT,--maxrregcount=$MAXRREGCOUNT) -L$CUDALIB -lcudadevrt) $if($ALLOWUNDEFGBLS,-allow-undefined-globals) $LDLIBS $input $if($DOREG,--register-link-binaries=$reg) $if($and($NEEDFPIFP,$isfile($FPIFPFINDEXFILE)),-uidx $FPIFPFINDEXFILE) -o $out)
	execute(
	    condition($DOREG)
	    input($reg)
	    command(compreg)
	);

variable OUTFLAG is default(-o);
command compreg is
	help(Compile the registration routines)
	tool(cuprepro)
	set(out=$if($REGOBJ,$REGOBJ,$basename($first($files)).$(OBJSUFFIX)))
	tempfile($ifn($REGFILE,$input))
	stdout($WINNUL)
	arguments($COMPARGS $COMPREGARGS -c -I. $OUTFLAG$out -DREGFILE=$quote(\")$quote($input)$quote(\") -D__CUDA_VER__=$CUDAXXYY $PGICUDAINCDIR/linkstub.c $if($USECLANG,-stdlib=libstdc++) $if($DYNINIT,$FPICFLAG));

# Show phase does -show
phase Show is hide
	action(helpvariable helpphase)
	forall
	help(Show variables);

switch -show is #not
	enable(Show)
	helpgroup(overall)
	stopafter(Show)
	help(Show compiler configuration);

phase Build is
	help(Build binary)
	default
	execute(
	    input($files)
	    command($if($USELLC,cstartllc,$if($USENVVM,$if($USEGPUBC,cnvvmas,$ifn($DEBUGLIBNVVM,cpgnvvm,cllvmlink)),$if($USELL,cllvmas,$if($CUPREPRO,prepro90,nvvm)))) ptx)
	);

phase Fat is
	help(Build fat binary)
	forall
	execute(
	    noinput(1)
	    #input($if($expr($smfiles=1),$SMFILE))
	    command(fatbin) #$if($expr($smfiles>1),fatbin,import)
	);

phase Link is
	help(Link relocatable objects)
	forall
	execute( input($files) command(cnvlink) );

phase Fpifp is
	help(Generate FPIFP index file expected by nvlink)
	forall
	execute( noinput(1) command(cgenfpifpidx) );

switch -fat=name is
	set(FATNAME=$name)
	disable(Build)
	enable(Fat);

switch -fatobj=name is
	set(FATNAME=$name)
	set(FATOBJ=1)
	disable(Build)
	enable(Fat);

switch -dolink is
	set(DOLINK=1)
	disable(Build)
	enable(Link);

variable PTXLINK is default();
switch -linkptx is
	set(PTXLINK=YES);

switch -regfile=file is
	set(DOREG=1)
	set(REGFILE=$file);
switch -regobj=file is
	set(DOREG=1)
	set(REGOBJ=$file);

switch -L$libdir is
	help(Passed to linker; Add directory to library search path)
	append(LDLIBS=-L$path($libdir));

switch -l$lib is
	help(Passed to linker; Add library name to library search list)
	positional(cnvlink);

switch -key=key is
	set(FATKEY=$key);

# when building runtime components, special handling
variable RTBUILD is default();
switch -rtbuildvar=name is set(RTBUILD=$name);

variable SMFILE is default();
variable smfiles is default(0);

switch -sm sm file is
	add(smfiles=1)
	set(SMFILE=$file)
	append(FATARGS=--image3=kind=elf,sm=$sm,file=$file $if($and($NEEDFPIFP,$not($NORDC),$expr($CUDAXXYY>=12050),$isfile($FPIFPFPRELINKOBJFILE),$isfile($FPIFPFINDEXFILE)),--image2=kind=reloc\,file=$FPIFPFPRELINKOBJFILE --image2=kind=index\,file=$FPIFPFINDEXFILE))
	append(IMPARGS=-cc$sm);

switch -compute cc file is
	append(FATARGS=--image3=kind=ptx,sm=$cc,file=$file);

switch -lto sm file is
    set(NEEDCUDALTO=1)
    set(CICCARGS=-ftz=$FTZ -prec_div=$NOFASTMATH -prec_sqrt=$NOFASTMATH -fmad=$USEFMA)
    append(FATARGS=--image3=kind=nvvm,sm=$sm,file=$file --cicc-cmdline=\"$quote($CICCARGS)\");

switch -v is
	help(Display each command as it is run)
	set(VERBOSE=2);

variable VERBOSE is		# set by -#, -v
	help(Display each command as it is executed)
	default();

switch -bin $file is hide
	help(Name output file)
	set(CUBINFILE=$file);

switch -o $file is
	help(Name output file)
	set(CUBINFILE=$file);

switch -ptx $file is
	help(Name PTX file)
	set(CUPTXFILE=$file);

switch -cuda$ver is
	help(Set cuda version)
	set(CUDAVERSION=$substr($ver,0,-4).$substr($ver,-2,-2));

switch -I$incdir is
	help(Add directory to include file search path)
	append(INC=$incdir);

switch -D$macro is
	help(Define a preprocessor macro)
	append(DEF=-D$macro);

switch -fastmath is
	help(Use faster, less accurate math intrinsic functions)
	set(NOFASTMATH=0)
	set(FASTMATHMACRO=-D__USE_FAST_MATH__=1);

switch -fpifp is
	help(Enable FPIFP support)
	set(NEEDFPIFP=1);

switch -fpifpprelinkobj file is
	help(Pass FPIFP prelinked object file to generate index file)
	enable(Fpifp)
	set(FPIFPFPRELINKOBJFILE=$file);

switch -fpifpindexfile file is
	help(Enable FPIFP support)
	set(FPIFPFINDEXFILE=$file)
	set(NEEDFPIFP=1);

switch -debug is
	help(Generate debug information)
	set(GOPT=-g2)
	set(NVVMOPTLEVEL=0)
        set(CUOPT=0)
	set(DEBUG=1)
	set(NVVMDEBUG=-g)
        set(DEBUG1=$DEBUG)
	set(DEBUG2=$DEBUG)
	set(DEBUG3=$DEBUG);

switch -Wnvvm arg is
	help(Pass argument to nvvm)
	append(NVVMFLAGS=$replace($arg,",", ))
	;

variable PTXASARGS is default();
switch -Wptxas arg is
	help(Pass argument to ptxas)
	append(PTXASARGS=$replace($arg,",", ))
	;

variable NVLINKARGS is default();
switch -Wnvlink,arg is
	help(Pass argument to nvlink)
	append(NVLINKARGS=$replace($arg,",", ))
	;

switch -Wfatbinary $arg is
	help(Pass argument to fatbinary)
	append(FATARGS=$replace($arg,",", ))
	;

variable IMPORTARGS is default();
switch -Wimport,arg is
	hide
	help(Pass argument to import)
	append(IMPORTARGS=$replace($arg,",", ))
	;

switch -cudaallowundefgbls is hide
        set(ALLOWUNDEFGBLS=1);

switch --devdebug is hide
        set(DEVDEBUG=1)
	set(CFEDEBUG=--debug_mode)
	append(NVVMARGS=-g)
	set(NVVMOPTLEVEL=0)
	set(CUOPT=0)
	set(DEBUG1=1)
	set(DEBUG2=1)
	set(DEBUG3=1)
	set(FATDEBUG=-g);

switch -lineinfo is
	help(Generate line info information)
	set(LINEINFO=1)
	set(NVVMLINEINFO=-generate-line-info);

switch -O0 is set(NVVMOPTLEVEL=0) set(CUOPT=0) set(DEFPTXOPT=0);
switch -O1 is set(NVVMOPTLEVEL=0) set(CUOPT=1);
switch -O2 is set(NVVMOPTLEVEL=3) set(CUOPT=2) set(DEFPTXOPT=3);
switch -O3 is set(NVVMOPTLEVEL=3) set(CUOPT=3) set(DEFPTXOPT=3);
switch -noopt is set(NVVMOPTLEVEL=0) set(CUOPT=0) set(DEFPTXOPT=0);

switch -ptxinfo=file is
	help(Save ptxas information)
	set(DOPTXINFO=1)
	set(PTXINFO=$file);

switch -info is
	help(Print ptxas information)
	set(DOPTXINFO=1);

switch -m64 is
	help(Use 64-bit pointers)
	set(CUWIDTH=64);

switch -dcuda=dir is
	help(Set CUDA root directory)
	set(CUDAROOT=$dir);

switch -cache is
	keyword(
	l1(set(CACHEL12=YES) set(CACHEL2=))
	l2(set(CACHEL2=YES) set(CACHEL12=))
	);

switch -nofma is
	set(USEFMA=0);

switch -ftz is
	set(FTZ=1);

switch -restrict is
	append(NVVMARGS=-kernel-params-are-retrict);

switch -usell is
	set(USELL=1);

switch	-math_uniform is
	set(DEVICE_MATHUNIFORM=1);

variable NEEDNVSHMEM is default(0);
switch -nvshmem is
	set(NEEDNVSHMEM=1);

variable NEEDCURAND is default(0);
switch -curand is
	set(NEEDCURAND=1);

switch -usenvvm is
	set(USENVVM=1);

switch -useompgpucg is
    set(USEOMPGPUCG=1)
    keyword(
        nvcc(set(USEOMPGPUCGNVCC=_nvcc))
        dbg(set(USEOMPGPUCGDBG=_debug))
    )
    nokeyword();

switch -omptarget is
	set(OMPTARGETOFFLOAD=1);

switch -ptxopt $level is
	set(PTXOPT=$level);

export LD_LIBRARY_PATH=$NVVMLINUXLIBDIR:$NEWLDLIB;
export DYLD_LIBRARY_PATH=$NVVMOSXLIBDIR:$NEWDYLDLIB;
export PATH=$if($index($TARGET,win64,win64-llvm,win64edg-llvm),$NEWWINPATH,$NEWPATH);

error($if($and($notequal($USECUDAROOT,),$not($isdir($USECUDAROOT)),$not($DRYRUN)),USECUDAROOT value is not a directory: $USECUDAROOT));

switch -f18rt is hide
        help(Use F18 CUDA runtime)
        set(USEF18CUDART=1);
