nimforum mirror - why multithreading execition take time almost equal to single thread execution?

SergeyPython (orginal) [2022-04-05T22:44:33+02:00] view original

 nim
import sequtils, threadpool, times, strutils

template time(statement: untyped): float =
  let t0 = cpuTime()
  statement
  cpuTime() - t0

proc splitToTerms(n:int, k:int): seq[seq[int]] =
  
  var kk = k
  if kk == 0: kk = n
  
  if n == 0:
    return newSeq[seq[int]]()
  
  result = newSeq[seq[int]]()
  
  if n <= kk:
    result.add(@[n])
  
  for i in 1 .. min(n, kk):
    for l in splitToTerms(n-i, i):
      result.add(l&(@[i]))


var res1: seq[int]
var res2: seq[int]

proc nonthreaded(n:int) =
  for i in 1 .. n:
    res1.add len(splitToTerms(n,i))


proc threaded(n:int) =
    for i in 1 .. n:
      let rs = spawn splitToTerms(n,i)
      res2.add len(^rs)


let n=50

assert res1 == res2

let t1 = time(nonthreaded(n)).formatFloat(ffDecimal, precision = 3)
echo "Time nonthreaded = ", t1, " s"
echo res1

let t2 = time(threaded(n)).formatFloat(ffDecimal, precision = 3)
echo "Time threaded = ", t2, " s"
echo res2

t1 = 11.86s, t2 = 0.982s for me. But! Before echo res2 is output to console i wait for ~11-12s, after t1 is outputed. WHY?

Araq (orginal) [2022-04-05T23:05:34+02:00] view original

Because you read from ^rs (which is blocking) too early. You use spawn and ^ -- now you also need to figure out what they mean. ;-)

Zoom (orginal) [2022-04-06T02:41:27+02:00] view original

It's all rather funny.

First of all, if you're insisting on using std/threadpool and it so happens that you use --gc:orc (or, like me, have it set in your config asthea default) you get SIGSEGV: Illegal storage access. (Attempt to read from nil?). Arc is of course much much slower than refc due to this reckless seq galore.

Also, it's rather hard for a beginner to deduce from the threadpool docs the correct way to use it ("cast?", "so, should I while until I get -1 or no?").

Finally, I had to use std/monotimes, as cpuTime measurements for this code outputs are completely off, which is probably expected, but not stressed enough in the docs.

# nim c --threads:on -d:release --gc:arc stt.nim && time ./stt
import std/[threadpool, times, strutils, strformat, monotimes]

proc splitToTerms(n:int, k:int): seq[seq[int]] =
  let kk = if k == 0: n else: k
  if n != 0:
    result = newSeq[seq[int]]()
    if n <= kk:
      result.add(@[n])
    for i in 1 .. min(n, kk):
      for l in splitToTerms(n-1, i):
        result.add(l&(@[i]))

proc threaded(n:int): seq[int] =
  var futures = newSeq[FlowVarBase](n)
  let startTime = cpuTime()
  let startMT = getMonoTime()
  
  for i in 1..n: # indexing to keep ordering
    futures[i-1] = spawn splitToTerms(n,i)
  result = newSeq[int](n)
  for _ in 1..n:
    let futIdx = futures.blockUntilAny()
    if futIdx != -1:
      let r = ^cast[FlowVar[seq[seq[int]]]](futures[futIdx])
      let dur = (getMonoTime() - startMT).inMilliseconds()
      echo &"res#{futIdx+1} len={r.len} in {dur} ms"
      result[futIdx] = r.len()
    else:
      break

const N=10
echo threaded(N)

SergeyPython (orginal) [2022-04-06T15:09:30+02:00] view original

^ - block current thread before receiving result/control of/from spawned proc. If so, i must have consecutive execution. Why then time template return 0,986s for threaded(n)?

SergeyPython (orginal) [2022-04-06T15:17:41+02:00] view original

it's executed more than 2 minutes for me, so i escape the execution.

Zoom (orginal) [2022-04-06T16:04:00+02:00] view original

Sorry, there's a typo in line 11: the first arg to splitToTerms should be n-i, not n-1. Unfortunately, the time limit for editing the post has passed. Correct program runs faster.

Regarding your other question, I suggest you use getMonoTime instead of cpuTime in threading situations.

SergeyPython (orginal) [2022-04-06T17:16:38+02:00] view original

Thx for your example. It's like middle level of handling futures. Is where any high level Nim API for handle futures? Like this in Python:

...
        with ThreadPoolExecutor(thread_name_prefix='Transport', max_workers=20) as executor:
            executor.submit(self.delman.handle_message, tag=tag) \
                .add_done_callback(self.handle_future)

Mirror of forum.nim-lang.org

9079 :: why multithreading execition take time almost equal to single thread execution?