I have this very basic skeleton for a robin hood hash. It may contain bugs. But the strange thing is, that using one of these two lines makes a gigantic difference in time for running the tests: (nim v 0.17, gcc 6.3)
#var a: array[N2, string]
var a = newSeq[string](N2)
import random
import hashes
import times
import math
import bitops
import strutils
template masked(i: untyped): typed {.dirty.}=
i and m.hi
type
Entry[K, V] = object
k: K # key
v: V # value
p, n: int # previous, next
Map[K, V] = object
hv: seq[int] # hash value
d: seq[Entry[K, V]] # data
first, last: int
numentries: int
hi: int
proc hnz[K](x: K): Hash =
result = hash($x)
if result == 0:
result = -1
proc newTable[K, V](initialCapacity: Natural; maxFillRate: range[10 .. 95] = 50): Map[K, V] =
## fillrate is the desired maximum fill of hash buffer in percent, value should
## be in the range 10 to 95%. Small value gives optimal performance.
## if capacity is a power of 2, then we start with a buffer of that size
## otherwise we create a initial buffer which can contain capacity elements
## taking into acount the desired maximum fill rate
## whenever actual fill rate is larger than desired value, buffer is increased
## by factor of two. Actual buffer size is always a power of two.
# we may care a bit for arithmetic overflow on pure 32 bit: cap = 1e8, fillrate 100%
var h = initialCapacity
if countSetBits(h) > 1:
if h < 1e7.int: # accuracy, but prevent overflow on 32 bit
h = h * 100 div maxFillrate
else:
h = h div maxFillrate * 100
# if h < 8: h = 8 # tiny values makes no real sense and may generate trouble
h = nextPowerOfTwo(h)
#result.data = newSeq[Entry[K, V]](result.hi)
#dec(result.hi)
result.numentries = 0
#result.fillrate = maxFillrate
#if result.hi < 1e7.int:
# result.cfr = result.hi * fillrate div 100
#else:
# result.cfr = result.hi div 100 * fillrate
#assert(result.cfr < result.hi)
result.d = newSeq[Entry[K, V]](h)
result.hv = newSeq[int](h)
result.hi = h - 1
proc insert[K, V](m: var Map[K, V]; key: K; val: V) =
let h = hnz(key)
let p = masked(h)
var i = p
var j = p
while m.hv[masked(i)] != 0:
inc(i)
while true:
j = i
i = masked(i - 1)
if i - masked(m.hv[i]) < i - p:
m.hv[j] = m.hv[i]
m.d[j] = m.d[i]
else:
break
m.hv[j] = h
m.d[j].v = val
m.d[j].k = key
m.d[j].n = m.first
m.first = j
proc contains[K, V](m: Map[K, V]; key: K): bool =
let h = hnz(key)
let p = masked(h)
var i = p
while true:
let hv = m.hv[i]
if hv == h:
return m.d[i].k == key
elif hv == 0 or masked(hv) > p:
return false
i = masked(i + 1)
proc get[K, V](m: Map[K, V]; key: K): V =
let h = hnz(key)
let p = masked(h)
var i = p
while true:
let hv = m.hv[i]
if hv == h and m.d[i].k == key:
return m.d[i].v
elif hv == 0 or masked(hv) > p:
when compiles($key):
raise newException(KeyError, "key not found: " & $key)
else:
raise newException(KeyError, "key not found")
i = masked(i + 1)
proc test() =
const
N = 1024 * 32
N2 = N div 2
var m = newTable[string, int](N)
#var a: array[N2, string]
var a = newSeq[string](N2)
for i in 0 .. a.high: a[i] = $i
a.shuffle
#echo "aaa"
for i in 0 .. a.len div 2 - 1:
m.insert(a[i], parseInt(a[i]))
#echo "bbb"
for i in 0 .. a.len div 2 - 1:
assert(m.contains(a[i]))
assert(m.get(a[i]) == parseInt(a[i]))
#echo "ccc"
for i in a.len div 2 .. a.high:
assert(not m.contains(a[i]))
#GC_disable()
m = newTable[string, int](N)
var t = cpuTime()
for p, v in a:
m.insert(v, p)
echo((cpuTime() - t) / a.len.float)
#echo "ddd"
t = cpuTime()
for p, v in a:
doassert(m.get(v) >= 0)
echo((cpuTime() - t) / a.len.float)
when isMainModule:
test()
$ time ./t
5.877685546874999e-08
3.784179687499999e-08
real 0m0.005s
user 0m0.005s
sys 0m0.000s
$ time ./t
0.0004289649047851563
4.602050781242744e-08
real 0m12.911s
user 0m12.906s
sys 0m0.000s
My initial guess was a stack overflow, so that the code is not working properly at all. But when I compile with plain "nim c t.nim" I get no runtime error. Other idea is passing by value vs passing by reference, but I can not see it currently.
It is likely an issue with the GC. An easy way to detect where your program spend the most time is to send a SIGINT (CTRL+C on the terminal) and analyse the traceback.
I repeated the operation a few times and it was always inside some method (grow, CellSet) in gc.nim I quickly checked with another GC --gc:markAndSweep and the problem doesn't occur.
It is roughly as fast as with a seq
Thanks for your investigations. GC was really not on my list of possible sources.
(And indeed, with --gc:markAndSweep all is fine!)
Good to know.
Better to know exactly. (Well soon we may get the book from Manning, they took a half year to elaborate Dom's draft, so hopefully they explain all these details... :-)
But seriously, I assume your conclusion is not correct. I myself am still surprised by that behaviour, and I think it may be a trap for many beginners. But I think the special case of my code is, that the array contains strings, and that strings are watched by the GC. My code was only intended for initial testing, generally I would tend to avoid such large objects on the stack, primary to avoid stack overflows. And I have to admit that I do not understand currently why the seq is no problem for the GC, as it also contains all the strings.
I do not understand currently why the seq is no problem for the GC
I don't know either, but I guess its because the GC has to check all the roots on the stack. For the seq it has only to check 1 root (the strings inside the seq are ref counted), for the array it has to check 16k roots.
I just made a small test and increased the ref count of each string in the array at the beginning and decreased it at the end. The result is, that the time of array variant decreased from 15sec to 2 sec on my PC.
import random
import hashes
import times
import math
import bitops
import strutils
template masked(i: untyped): typed {.dirty.}=
i and m.hi
type
Entry[K, V] = object
k: K # key
v: V # value
p, n: int # previous, next
Map[K, V] = object
hv: seq[int] # hash value
d: seq[Entry[K, V]] # data
first, last: int
numentries: int
hi: int
proc hnz[K](x: K): Hash =
result = hash($x)
if result == 0:
result = -1
proc newTable[K, V](initialCapacity: Natural; maxFillRate: range[10 .. 95] = 50): Map[K, V] =
## fillrate is the desired maximum fill of hash buffer in percent, value should
## be in the range 10 to 95%. Small value gives optimal performance.
## if capacity is a power of 2, then we start with a buffer of that size
## otherwise we create a initial buffer which can contain capacity elements
## taking into acount the desired maximum fill rate
## whenever actual fill rate is larger than desired value, buffer is increased
## by factor of two. Actual buffer size is always a power of two.
# we may care a bit for arithmetic overflow on pure 32 bit: cap = 1e8, fillrate 100%
var h = initialCapacity
if countSetBits(h) > 1:
if h < 1e7.int: # accuracy, but prevent overflow on 32 bit
h = h * 100 div maxFillrate
else:
h = h div maxFillrate * 100
# if h < 8: h = 8 # tiny values makes no real sense and may generate trouble
h = nextPowerOfTwo(h)
#result.data = newSeq[Entry[K, V]](result.hi)
#dec(result.hi)
result.numentries = 0
#result.fillrate = maxFillrate
#if result.hi < 1e7.int:
# result.cfr = result.hi * fillrate div 100
#else:
# result.cfr = result.hi div 100 * fillrate
#assert(result.cfr < result.hi)
result.d = newSeq[Entry[K, V]](h)
result.hv = newSeq[int](h)
result.hi = h - 1
proc insert[K, V](m: var Map[K, V]; key: K; val: V) =
let h = hnz(key)
let p = masked(h)
var i = p
var j = p
while m.hv[masked(i)] != 0:
inc(i)
while true:
j = i
i = masked(i - 1)
if i - masked(m.hv[i]) < i - p:
m.hv[j] = m.hv[i]
m.d[j] = m.d[i]
else:
break
m.hv[j] = h
m.d[j].v = val
m.d[j].k = key
m.d[j].n = m.first
m.first = j
proc contains[K, V](m: Map[K, V]; key: K): bool =
let h = hnz(key)
let p = masked(h)
var i = p
while true:
let hv = m.hv[i]
if hv == h:
return m.d[i].k == key
elif hv == 0 or masked(hv) > p:
return false
i = masked(i + 1)
proc get[K, V](m: Map[K, V]; key: K): V =
let h = hnz(key)
let p = masked(h)
var i = p
while true:
let hv = m.hv[i]
if hv == h and m.d[i].k == key:
return m.d[i].v
elif hv == 0 or masked(hv) > p:
when compiles($key):
raise newException(KeyError, "key not found: " & $key)
else:
raise newException(KeyError, "key not found")
i = masked(i + 1)
proc test() =
const
N = 1024 * 32
N2 = N div 2
var m = newTable[string, int](N)
var a: array[N2, string]
#var a = newSeq[string](N2)
for i in 0 .. a.high: a[i] = $i
# increase the ref count of each string in the array
for i in 0 .. a.high: GC_ref(a[i])
a.shuffle
#echo "aaa"
for i in 0 .. a.len div 2 - 1:
m.insert(a[i], parseInt(a[i]))
#echo "bbb"
for i in 0 .. a.len div 2 - 1:
assert(m.contains(a[i]))
assert(m.get(a[i]) == parseInt(a[i]))
#echo "ccc"
for i in a.len div 2 .. a.high:
assert(not m.contains(a[i]))
#GC_disable()
m = newTable[string, int](N)
var t = cpuTime()
for p, v in a:
m.insert(v, p)
echo((cpuTime() - t) / a.len.float)
#echo "ddd"
t = cpuTime()
for p, v in a:
doassert(m.get(v) >= 0)
echo((cpuTime() - t) / a.len.float)
# decrease it
for i in 0 .. a.high: GC_unref(a[i])
when isMainModule:
test()