Provably my current solution can be approved upon. Thanks for any comments, Helmut
# compile nim --threads:on -d:release c -r Count.nim
import random, weave, atomics
var
GlobalMax : Atomic[float]
let GMax_ptr = addr GlobalMax
proc comp_max() : float = rand(1.0)
proc atomicMax[T](globMax: var Atomic[T]; localMax: var T) =
var CMax : T = globMax.load
while (localMax > CMax) and not compareExchange(globMax,CMax,localMax): discard
init(Weave)
GlobalMax.store(-1.0)
syncScope():
parallelForStaged i in 0 .. 1000 :
captures: {GMax_ptr}
prologue:
var localMax = -1.0
loop:
localMax= max(localMax,comp_max())
epilogue:
atomicMax(GlobalMax,localMax)
echo GlobalMax.load
exit(Weave)
Here is a version using a parallel reducer. It requires a special import as I'm not sure about the API yet.
import random, weave, weave/parallel_reduce
proc maxReduce(n: int): float64 =
var waitableMax: Flowvar[float64]
parallelReduceImpl i in 0 .. n, stride = 1:
reduce(waitableMax):
prologue:
var threadLocalRNG = initRand(1234)
var localMax = -Inf
fold:
# Note all thread will start with the same RNG sequence.
# Multithreaded RNG is a non-trivial problem, so showing a simple example
# since the question is about max
localMax = max(localMax, threadLocalRNG.rand(1.0))
merge(remoteMax):
localMax = max(localMax, sync(remoteMax))
return localMax
result = sync(waitableMax)
init(Weave)
let max1M = maxReduce(1000000)
echo "maxReduce(1000000): ", max1M
exit(Weave)
Version(s) with parallelForStaged coming next.
And here is the version using parallelForStaged
import random, weave, locks
proc maxReduce(n: int): float64 =
var max = -Inf
let maxAddr = max.addr
var lock: Lock
lock.initLock()
let lockAddr = lock.addr
parallelForStaged i in 0 .. n:
captures:{maxAddr, lockAddr}
awaitable: maxLoop
prologue:
var threadLocalRNG = initRand(1234)
var localMax = -Inf
loop:
# Note all thread will start with the same RNG sequence.
# Multithreaded RNG is a non-trivial problem, so showing a simple example
# since the question is about max
localMax = max(localMax, threadLocalRNG.rand(1.0))
epilogue:
lockAddr[].acquire()
maxAddr[] = max(maxAddr[], localMax)
lockAddr[].release()
discard sync(maxLoop)
lock.deinitLock()
return max
init(Weave)
let max1M = maxReduce(1000000)
echo "maxReduce(1000000): ", max1M
exit(Weave)
You have more complex, nested reduction examples on 2D histograms and 2d matrices here (note I need to update the awaitable loop versions since sync now returns a bool):