/* g++ -O3 -pedantic -Wall -std=c++11 vecbench.cpp -o vec.exe && ./vec.exe */
#include <iostream>
#include <vector>
#include <ctime>
std::vector<std::vector<float>> vec;
int main(int argc, char *argv[]) {
vec.resize(10000000);
clock_t begin = clock();
for (auto &i : vec) {
i.push_back(1.3);
i.push_back(1.3);
i.push_back(1.3);
i.push_back(1.3);
i.push_back(1.3);
i.push_back(1.3);
i.push_back(1.3);
i.push_back(1.3);
i.push_back(1.3);
}
clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
std::cout << "elapsed time: " << elapsed_secs << std::endl;
return 0;
}
# nim c -r --gc:markAndSweep -d:release seqbench.nim
import times, os, strutils
template benchmark(benchmarkName: string, code: untyped) =
block:
let t0 = epochTime()
code
let elapsed = epochTime() - t0
let elapsedStr = elapsed.formatFloat(format = ffDecimal, precision = 3)
echo "CPU Time [", benchmarkName, "] ", elapsedStr, "s"
var vec: seq[seq[float32]]
vec.setLen(10000000)
proc fill() =
for i in 0..< vec.len:
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
vec[i].add(1.3)
benchmark("elapsed time: ", fill())
C++: 2.766s Nim: 7.607s
Is there any faster way? I tried with shallowCopy, but there wasn't any difference
Do you know what you are doing?
vec.setLen(10000000)
set the length and actual position to the end. This means that next add() adds at position 10000000, not at position 0, so reallocations may occur. Maybe use newSeqOfCap() or setLen(0).
I made a similar mistake when I started with Nim, and additional did my test with different elements sizes:
Generally, Nim's add() should have the same performance as C++ push-back() nearly.
On my bit old Gentoo linux machine,
$ nim -v
Nim Compiler Version 0.20.2 [Linux: amd64]
Compiled at 2019-07-30
C++: 3.4269
nim c -r -d:release seqbench.nim
6.683s
nim c -r -d:danger seqbench.nim
6.986s
nim cpp -r -d:danger seqbench.nim
7.011s
nim c -r -d:danger --gc:none seqbench.nim
3.922s
nim c -r -d:danger --newruntime seqbench.nim
5.769s
According to this document, https://nim-lang.org/docs/gc.html
The GC is only triggered in a memory allocation operation. It is not triggered by some timer and does not run in a background thread.
Your code allocate heap memory inside the loop. It seems GC is a bottleneck.
Also on devel -d:release flag was separated into -d:release (no stacktraces, lineinfo and such) and -d:danger (no checks including asserts and bound checks).
When benchmarking array accesses -d:danger is needed.
With --newruntime you should consider -d:useMalloc
For me: