Should I avoid overriding methods whenever possible?
from times import epochtime
from strutils import format
type
TestA = ref object of RootObj
TestB = ref object of TestA
TestC = ref object of TestB
TestD = ref object of RootObj
TestE = ref object of TestD
TestF = ref object of TestE
method p (self:TestA, x:int) :int {.base.} = return x+2
method p (self:TestB, x:int) :int = return x+2
method p (self:TestC, x:int) :int = return x+2
method p (self:TestD, x:int) :int {.base.} = return x+2
template bench (n:int, title:string, obj:untyped) :untyped =
let startTime = epochtime()
let tst = obj
var res = 0
for i in 0..<n : res += tst.p(i)
echo "$# result:$# time:$# ns".format( title, res, int((epochtime() - startTime) * 1_000_000_000) )
let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("C") : TestC()
n.bench("D") : TestD()
result
1000000000 times
A result:500000001500000000 time:5792839050 ns
B result:500000001500000000 time:4851602077 ns
C result:500000001500000000 time:4816570043 ns
D result:500000001500000000 time:1192 ns
nimcache (clang -c -w -O3 )
N_LIB_PRIVATE N_NIMCALL(NI, p_Aw3lesFExZt7MqrTSldVkA)(tyObject_TestAcolonObjectType__my7mzkOUxsMZjO5v1YyvCg* self, NI x) {
NI result;
{ result = (NI)0;
result = (NI)(x + ((NI) 2));
goto BeforeRet_;
}BeforeRet_: ;
return result;
}
N_LIB_PRIVATE N_NIMCALL(NI, p_dm2n08nTnaT9czYrAnBLG9cQ)(tyObject_TestBcolonObjectType__9bi1P9aySy82QHeC9aSv1GsBw* self, NI x) {
NI result;
{ result = (NI)0;
result = (NI)(x + ((NI) 2));
goto BeforeRet_;
}BeforeRet_: ;
return result;
}
N_LIB_PRIVATE N_NIMCALL(NI, p_T9bkbRDmlSvDBsMMNFJRmdg)(tyObject_TestCcolonObjectType__agr0Oxf9aZJtF1PQMpDWW8Q* self, NI x) {
NI result;
{ result = (NI)0;
result = (NI)(x + ((NI) 2));
goto BeforeRet_;
}BeforeRet_: ;
return result;
}
N_LIB_PRIVATE N_NIMCALL(NI, p_7IeQfxWq7IzSKT9biOTaLAA)(tyObject_TestDcolonObjectType__QU8t7UNc2UQQ3cCM9bHpnHw* self, NI x) {
NI result;
{ result = (NI)0;
result = (NI)(x + ((NI) 2));
goto BeforeRet_;
}BeforeRet_: ;
return result;
}
N_LIB_PRIVATE N_NIMCALL(NI, p_i5lHfL9bd85EwAlh0eEVYOQ)(tyObject_TestAcolonObjectType__my7mzkOUxsMZjO5v1YyvCg* self, NI x) {
NI result;
{ result = (NI)0;
chckNilDisp(self);
{
if (!((self) && ((*self).Sup.m_type == (&NTI_agr0Oxf9aZJtF1PQMpDWW8Q_)))) goto LA3_;
result = p_T9bkbRDmlSvDBsMMNFJRmdg(((tyObject_TestCcolonObjectType__agr0Oxf9aZJtF1PQMpDWW8Q*) (self)), x);
goto BeforeRet_;
}
goto LA1_;
LA3_: ;
{
if (!((self) && (isObjWithCache((*self).Sup.m_type, (&NTI_9bi1P9aySy82QHeC9aSv1GsBw_), Nim_OfCheck_CACHE13)))) goto LA6_;
result = p_dm2n08nTnaT9czYrAnBLG9cQ(((tyObject_TestBcolonObjectType__9bi1P9aySy82QHeC9aSv1GsBw*) (self)), x);
goto BeforeRet_;
}
goto LA1_;
LA6_: ;
{
if (!((self) && (isObjWithCache((*self).Sup.m_type, (&NTI_my7mzkOUxsMZjO5v1YyvCg_), Nim_OfCheck_CACHE14)))) goto LA9_;
result = p_Aw3lesFExZt7MqrTSldVkA(self, x);
goto BeforeRet_;
}
goto LA1_;
LA9_: ;
LA1_: ;
}BeforeRet_: ;
return result;
}
N_LIB_PRIVATE N_NIMCALL(NI, p_kWP9alFwS9aDbbtge4Kk9cdQA)(tyObject_TestDcolonObjectType__QU8t7UNc2UQQ3cCM9bHpnHw* self, NI x) {
NI result;
{ result = (NI)0;
result = p_7IeQfxWq7IzSKT9biOTaLAA(self, x);
goto BeforeRet_;
}BeforeRet_: ;
return result;
}
add
let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("C") : TestC()
n.bench("D") : TestD()
n.bench("E") : TestE()
n.bench("F") : TestF()
result
1000000000 times
A result:500000001500000000 time:6607109069 ns
B result:500000001500000000 time:5303513050 ns
C result:500000001500000000 time:5023578166 ns
D result:500000001500000000 time:0 ns
E result:500000001500000000 time:0 ns
F result:500000001500000000 time:953 ns
I looked at the assembly and examined it, but in a very simple case it seems optimization will not hit.
However, I thought that it is not necessary to worry too much, except to make a very large amount of very simple calculations that would allow for a large optimization on the override.
For example, sqrt seems to have less difference.
thanks
simple x2 nim
from times import epochtime
from strutils import format
from math import sqrt
type
TestA = ref object of RootObj
TestB = ref object of TestA
TestD = ref object of RootObj
method p (self:TestA, x:int) :int {.base.} = return x*2
method p (self:TestB, x:int) :int = return x*2
method p (self:TestD, x:int) :int {.base.} = return x*2
template bench (n:int, title:string, obj:untyped) :untyped =
let startTime = epochtime()
let tst = obj
var res = 0
for i in 0..<n : res += tst.p(i)
echo "$# result:$# time:$# ns".format( title, res, int((epochtime() - startTime) * 1_000_000_000) )
let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("D") : TestD()
simple x2 result
1000000000 times
A result:999999999000000000 time:4097547054 ns
B result:999999999000000000 time:3608579874 ns
D result:999999999000000000 time:953 ns
simple x2 assembly TestA
callq _addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_5:
movq _tst_doe6pEB1jyWn6yLs773CEw@GOTPCREL(%rip), %rax
movq %rbx, (%rax)
movq _res_JV6l78E23Gc61irJB5Ow7w@GOTPCREL(%rip), %rax
movq $0, (%rax)
movq _n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
cmpq $0, (%rax)
jle LBB9_21
## BB#6:
movl $1, %r15d
xorl %r12d, %r12d
movq _i_vyCYEvJNp9c9b1bD8JVdQqrQ@GOTPCREL(%rip), %r13
xorl %r14d, %r14d
jmp LBB9_8
.p2align 4, 0x90
LBB9_7: ## in Loop: Header=BB9_8 Depth=1
incq %r14
movq _tst_doe6pEB1jyWn6yLs773CEw@GOTPCREL(%rip), %rax
movq (%rax), %rbx
incq %r15
addq $2, %r12
LBB9_8: ## =>This Inner Loop Header: Depth=1
leaq -1(%r15), %rax
movq %rax, (%r13)
movq %rbx, %rdi
callq _chckNilDisp
testq %rbx, %rbx
je LBB9_16
## BB#9: ## in Loop: Header=BB9_8 Depth=1
movq (%rbx), %rdi
cmpq _NTI_7313k2M1TRNjWfEhebZVKw_@GOTPCREL(%rip), %rdi
je LBB9_17
## BB#10: ## in Loop: Header=BB9_8 Depth=1
cmpq _NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rdi
je LBB9_15
## BB#11: ## in Loop: Header=BB9_8 Depth=1
movq _NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rax
cmpq %rax, 16(%rdi)
je LBB9_15
## BB#12: ## in Loop: Header=BB9_8 Depth=1
cmpq %rdi, _Nim_OfCheck_CACHE11(%rip)
je LBB9_16
## BB#13: ## in Loop: Header=BB9_8 Depth=1
cmpq %rdi, _Nim_OfCheck_CACHE11+8(%rip)
je LBB9_15
## BB#14: ## in Loop: Header=BB9_8 Depth=1
movq _NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rsi
leaq _Nim_OfCheck_CACHE11(%rip), %rdx
callq _isObjSlowPath_k9bdq9bQE075AR7scLFt5wIg
testb %al, %al
je LBB9_16
.p2align 4, 0x90
LBB9_15: ## in Loop: Header=BB9_8 Depth=1
movq %r12, %rax
jmp LBB9_18
.p2align 4, 0x90
LBB9_16: ## in Loop: Header=BB9_8 Depth=1
xorl %eax, %eax
jmp LBB9_18
.p2align 4, 0x90
LBB9_17: ## in Loop: Header=BB9_8 Depth=1
leaq (%r14,%r14), %rax
LBB9_18: ## in Loop: Header=BB9_8 Depth=1
movq _res_JV6l78E23Gc61irJB5Ow7w@GOTPCREL(%rip), %rcx
addq %rax, (%rcx)
movq _n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
cmpq (%rax), %r15
jl LBB9_7
LBB9_21:
leaq _TM_ipcYmBC9bj9a1BW35ABoB1Kw_5(%rip), %rdi
callq _copyString
simple x2 assembly TestD
callq _addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_47:
movq %rbx, (%r15)
movq _res_JV6l78E23Gc61irJB5Ow7w_3@GOTPCREL(%rip), %rbx
movq $0, (%rbx)
movq _n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
movq (%rax), %rax
testq %rax, %rax
jle LBB9_49
## BB#48:
leaq -1(%rax), %rcx
leaq -2(%rax), %rdx
imulq %rcx, %rdx
andq $-2, %rdx
leaq -2(%rdx,%rax,2), %rax
movq _i_vyCYEvJNp9c9b1bD8JVdQqrQ_3@GOTPCREL(%rip), %rdx
movq %rcx, (%rdx)
movq %rax, (%rbx)
LBB9_49:
leaq _TM_ipcYmBC9bj9a1BW35ABoB1Kw_9(%rip), %rdi
callq _copyString
sqrt nim
from times import epochtime
from strutils import format
from math import sqrt
type
TestA = ref object of RootObj
TestB = ref object of TestA
TestD = ref object of RootObj
method p (self:TestA, x:float64) :float64 {.base.} = return x.sqrt
method p (self:TestB, x:float64) :float64 = return x.sqrt
method p (self:TestD, x:float64) :float64 {.base.} = return x.sqrt
template bench (n:int, title:string, obj:untyped) :untyped =
let startTime = epochtime()
let tst = obj
var res = 0.0
for i in 0..<n : res += tst.p(float64(i))
echo "$# result:$# time:$# ns".format( title, res, int((epochtime() - startTime) * 1_000_000_000) )
let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("D") : TestD()
sqrt result
1000000000 times
A result:21081851051977.78 time:4417852163 ns
B result:21081851051977.78 time:4571517229 ns
D result:21081851051977.78 time:4466888189 ns
sqrt assembly TestA
callq _addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_5:
movq %r15, (%r13)
movq _res_JV6l78E23Gc61irJB5Ow7w@GOTPCREL(%rip), %r14
movq $0, (%r14)
movq _n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
cmpq $0, (%rax)
jle LBB9_17
## BB#6:
xorl %ebx, %ebx
movq _i_vyCYEvJNp9c9b1bD8JVdQqrQ@GOTPCREL(%rip), %r12
jmp LBB9_7
.p2align 4, 0x90
LBB9_16: ## in Loop: Header=BB9_7 Depth=1
movq (%r13), %r15
LBB9_7: ## =>This Inner Loop Header: Depth=1
movq %rbx, (%r12)
movq %r15, %rdi
callq _chckNilDisp
xorpd %xmm0, %xmm0
testq %r15, %r15
je LBB9_15
## BB#8: ## in Loop: Header=BB9_7 Depth=1
xorps %xmm1, %xmm1
cvtsi2sdq %rbx, %xmm1
movq (%r15), %rdi
cmpq _NTI_7313k2M1TRNjWfEhebZVKw_@GOTPCREL(%rip), %rdi
je LBB9_14
## BB#9: ## in Loop: Header=BB9_7 Depth=1
cmpq _NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rdi
je LBB9_14
## BB#10: ## in Loop: Header=BB9_7 Depth=1
movq _NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rax
cmpq %rax, 16(%rdi)
je LBB9_14
## BB#11: ## in Loop: Header=BB9_7 Depth=1
cmpq %rdi, _Nim_OfCheck_CACHE11(%rip)
je LBB9_15
## BB#12: ## in Loop: Header=BB9_7 Depth=1
cmpq %rdi, _Nim_OfCheck_CACHE11+8(%rip)
je LBB9_14
## BB#13: ## in Loop: Header=BB9_7 Depth=1
movq _NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rsi
leaq _Nim_OfCheck_CACHE11(%rip), %rdx
movsd %xmm1, -168(%rbp) ## 8-byte Spill
callq _isObjSlowPath_k9bdq9bQE075AR7scLFt5wIg
movsd -168(%rbp), %xmm1 ## 8-byte Reload
## xmm1 = mem[0],zero
xorpd %xmm0, %xmm0
testb %al, %al
je LBB9_15
.p2align 4, 0x90
LBB9_14: ## in Loop: Header=BB9_7 Depth=1
xorps %xmm0, %xmm0
sqrtsd %xmm1, %xmm0
LBB9_15: ## in Loop: Header=BB9_7 Depth=1
addsd (%r14), %xmm0
movsd %xmm0, (%r14)
incq %rbx
movq _n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
cmpq (%rax), %rbx
jl LBB9_16
LBB9_17:
leaq _TM_ipcYmBC9bj9a1BW35ABoB1Kw_5(%rip), %rdi
callq _copyString
sqrt assembly TestD
callq _addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_39:
movq %rbx, (%r15)
movq _res_JV6l78E23Gc61irJB5Ow7w_3@GOTPCREL(%rip), %rbx
movq $0, (%rbx)
movq _n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
movq (%rax), %rdx
testq %rdx, %rdx
jle LBB9_50
## BB#40:
leaq -1(%rdx), %rcx
movl %edx, %eax
andl $3, %eax
cmpq $3, %rcx
jae LBB9_42
## BB#41:
xorpd %xmm0, %xmm0
xorl %ecx, %ecx
## implicit-def: %RDX
testq %rax, %rax
jne LBB9_46
jmp LBB9_49
LBB9_42:
subq %rax, %rdx
xorpd %xmm0, %xmm0
xorl %ecx, %ecx
.p2align 4, 0x90
LBB9_43: ## =>This Inner Loop Header: Depth=1
xorps %xmm1, %xmm1
cvtsi2sdq %rcx, %xmm1
sqrtsd %xmm1, %xmm1
addsd %xmm0, %xmm1
leaq 1(%rcx), %rsi
xorps %xmm0, %xmm0
cvtsi2sdq %rsi, %xmm0
sqrtsd %xmm0, %xmm0
addsd %xmm1, %xmm0
leaq 2(%rcx), %rsi
xorps %xmm1, %xmm1
cvtsi2sdq %rsi, %xmm1
sqrtsd %xmm1, %xmm1
addsd %xmm0, %xmm1
leaq 3(%rcx), %rsi
xorps %xmm0, %xmm0
cvtsi2sdq %rsi, %xmm0
sqrtsd %xmm0, %xmm0
addsd %xmm1, %xmm0
addq $4, %rcx
cmpq %rcx, %rdx
jne LBB9_43
## BB#44:
leaq -1(%rcx), %rdx
testq %rax, %rax
je LBB9_49
LBB9_46:
negq %rax
.p2align 4, 0x90
LBB9_47: ## =>This Inner Loop Header: Depth=1
xorps %xmm1, %xmm1
cvtsi2sdq %rcx, %xmm1
sqrtsd %xmm1, %xmm1
addsd %xmm1, %xmm0
incq %rcx
incq %rax
jne LBB9_47
## BB#48:
decq %rcx
movq %rcx, %rdx
LBB9_49:
movq _i_vyCYEvJNp9c9b1bD8JVdQqrQ_3@GOTPCREL(%rip), %rax
movq %rdx, (%rax)
movsd %xmm0, (%rbx)
LBB9_50:
leaq _TM_ipcYmBC9bj9a1BW35ABoB1Kw_9(%rip), %rdi
callq _copyString
Without reading your asm output in detail, I think compiler constant folding hit you. You need to manufacture benchmarks where the compiler is unable to do this. E.g. generating random unpredictable input or reading the input from a file. You might want to read this article I wrote on the topic:
http://blog.johnnovak.net/2017/04/22/nim-performance-tuning-for-the-uninitiated/