in nim manual, i found || iterator that support openmp pragma.
and i found some similar case in https://forum.nim-lang.org/t/9858.
can i use OpenAcc pragma like this?
for i in `||`(a,b, "#pragma acc kernels") :
someFunc(i)
and result in C like:
#pragma acc kernels
{ for (i = range.a; i <= range.b; ++i) {
someFunc(i)
}
}
Sorry for my bad english.
Unfortunately, the || iterator always prefixes by #pragma omp. With your example up there you get #pragma omp acc kernels above the annotated loop.
I can only assume that it _should be a relatively straightforward compiler fix to make that adjustable / only default to #pragma omp if the given string doesn't start with #pragma though (the || iterator came up multiple lately for me and there's quite a few quality improvements it could use).
thanks! this community is so kind!
now i tried this simple test.
{.emit:"""
#include "openacc.h"
""".}
proc test() =
var a: ptr UncheckedArray[int]
for i in `||`(0,9,annotation="parallel if(0) \n#pragma acc kernels"):
a[i] = i
when isMainModule:
test()
# complie : nim c --cc:clang --clang.exe="nvc" --clang.linkerexe="nvc" --passC:"-Minfo=all -acc=gpu -gpu=cc86" nvc_test.nim
then i met this error message
...
Hint: [Link]
/usr/bin/ld: /home/nimcache/@mnvc_test.nim.c.o: in function `test__nvc95test_u1':
/home/nimcache/@mnvc_test.nim.c:36: undefined reference to `__pgi_uacc_enter'
/usr/bin/ld: /home/nimcache/@mnvc_test.nim.c:34: undefined reference to `__pgi_uacc_computestart2'
/usr/bin/ld: /home/nimcache/@mnvc_test.nim.c:34: undefined reference to `__pgi_uacc_launch'
/usr/bin/ld: /home/nimcache/@mnvc_test.nim.c:36: undefined reference to `__pgi_uacc_computedone'
/usr/bin/ld: /home/nimcache/@mnvc_test.nim.c:36: undefined reference to `__pgi_uacc_noversion'
....
how can be solved? someone know this problem?? i found answer! i missed passL!!
nim c --cc:clang --clang.exe="nvc" --clang.linkerexe="nvc" --passC:"-Minfo=accel -acc=gpu -gpu=cc86" --passL:"-acc=gpu -gpu=cc86" -d:release nvc_test.nim
now it works! now i can use nim with openacc for hpc by gpu! yeah~ oh... another problem is remain... but i got some possibility
Failing in Thread:1
Accelerator Fatal Error: call to cuStreamSynchronize returned error 700 (CUDA_ERROR_ILLEGAL_ADDRESS): Illegal address during kernel execution
File: /home/nimcache/@mnvc_test.nim.c
Function: test__nvc95test_u1:29
Line: 36
problem solved!! now i can use openacc in nim!
import strformat
{.emit:"""
#include "openacc.h"
""".}
const num = 10
proc test() =
let a = cast[ptr[UncheckedArray[float32]]](alloc0(sizeof(float32)*num))
let b = cast[ptr[UncheckedArray[float32]]](alloc0(sizeof(float32)*num))
let c = cast[ptr[UncheckedArray[float32]]](alloc0(sizeof(float32)*num))
for i in 0 ..< num:
a[i] = i.float32
b[i] = (num - i).float32
echo "a[7] = ", $a[7], " | b[7] = ", $b[7]
echo "before : c[7] = ", $c[7]
const annot =
"\n#pragma acc data " &
fmt"copyin(a[:{num}],b[:{num}],c[:{num}]) " &
"\n#pragma acc kernels"
{.emit:annot.}
block:
for i in `||`(0,num-1, ""):
for j in `||`(0,num-1,""):
c[i] += a[i] * b[j]
{.emit:"\n#pragma acc data " & fmt"copyout(c[:{num}])".}
echo "after : c[7] = ", $c[7]
dealloc(a)
dealloc(b)
dealloc(c)
when isMainModule:
test()
outputs:
a[7] = 7.0 | b[7] = 3.0
before : c[7] = 0.0
after : c[7] = 385.0
if i use array instead of ptr uncheckedarray, then i don't need dealloc. right??
import strformat
{.emit:"""
#include "openacc.h"
""".}
const num = 128
proc test() =
var a : array[num, cdouble]
var b : array[num, cdouble]
var c : array[num,array[num, cdouble]]
var d : array[num, cdouble]
for i in 0 ..< num:
a[i] = i.cdouble
b[i] = (num - i).cdouble
echo "a[7] = ", $a[7], " | b[7] = ", $b[7]
echo "before : c[7][7] = ", $c[7][7]
const annot =
"\n#pragma acc data " &
fmt"copyin(a[:{num}],b[:{num}],c[:{num}][:{num}],d[:{num}]) " &
"\n#pragma acc kernels"
{.emit:annot.}
block:
for jj in `||`(0,num-1,""):
d[jj] = jj.cdouble
for i in `||`(0,num-1, ""):
for j in `||`(0,num-1,""):
c[i][j] = a[i] * b[j] + d[j]
{.emit:"\n#pragma acc data " & fmt"copyout(c[:{num}][:{num}])".}
echo "after : c[7][7] = ", $c[7][7]
when isMainModule:
test()
a[7] = 7.0 | b[7] = 121.0 before : c[7][7] = 0.0 after : c[7][7] = 854.0
if i use array instead of ptr uncheckedarray, then i don't need dealloc. right??
The array type is stack-allocated so not need for dealloc, which is for heap memory. It gets freed automatically when its declaring stack frame exits.
Also, the $ operator is redundant for echo.
import strformat
{.emit:"""
#include "openacc.h"
""".}
const num = 512
type
DataSeq = ref object
x, y, z: seq[cdouble]
Data[N:static[int]] = object
x, y, z: ref array[N, cdouble]
proc newDataSeq(N: static[int], T:typedesc): DataSeq =
result = DataSeq(
x:newSeq[T](N),
y:newSeq[T](N),
z:newSeq[T](N)
)
proc toStatic(m: DataSeq, N: static[int]): Data[N] =
new result.x
new result.y
new result.z
for i in 0..<N:
result.x[i] = m.x[i]
result.y[i] = m.y[i]
result.z[i] = m.z[i]
proc newData(N: static[int], T:typedesc): Data[N] =
var dataseq = newDataSeq(N, T)
result = dataseq.toStatic(N)
proc test[N](d: Data[N]): array[N,array[N, cdouble]] =
{.push boundChecks : off.}
var
a = d.x[]
b = d.y[]
c = d.z[]
m = result
for i in 0 ..< num:
a[i] = i.cdouble
b[i] = (num - i).cdouble
echo "a[7] = ", $a[7], " | b[7] = ", $b[7], " | c[7] = ", $c[7]
echo "before : m[7][7] = ", $m[7][7]
const annot =
"\n#pragma acc data " &
fmt"copyin(a[:{num}],b[:{num}],c[:{num}],m[:{num}][:{num}]) " &
"\n#pragma acc kernels"
{.emit:annot.}
block:
for jj in `||`(0,num-1,""):
c[jj] = jj.cdouble
for i in `||`(0,num-1, ""):
for j in `||`(0,num-1,""):
m[i][j] = a[i] * b[j] + c[j]
{.emit:"\n#pragma acc data " & fmt"copyout(m[:{num}][:{num}],c[:{num}])".}
echo "after : m[7][7] = ", $m[7][7], " | c[7] = ", $c[7]
result = m
{.pop.}
when isMainModule:
var d = newData(num, cdouble)
var m = test(d)
echo "m[7][7] = ", $m[7][7]
i tried using object contain array set. some tips for use and create object contain multiple array?