nimforum mirror - Is the overriding method not optimized?

bazi (orginal) [2018-07-13T12:23:38+02:00] view original

Should I avoid overriding methods whenever possible?

from times import epochtime
from strutils import format

type
  TestA = ref object of RootObj
  TestB = ref object of TestA
  TestC = ref object of TestB
  
  TestD = ref object of RootObj
  TestE = ref object of TestD
  TestF = ref object of TestE

method p (self:TestA, x:int) :int {.base.} = return x+2
method p (self:TestB, x:int) :int = return x+2
method p (self:TestC, x:int) :int = return x+2
method p (self:TestD, x:int) :int {.base.} = return x+2

template bench (n:int, title:string, obj:untyped) :untyped =
  let startTime = epochtime()
  let tst = obj
  var res = 0
  for i in 0..<n : res += tst.p(i)
  echo "$# result:$# time:$# ns".format( title, res, int((epochtime() - startTime) * 1_000_000_000) )

let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("C") : TestC()
n.bench("D") : TestD()

result


1000000000 times
A result:500000001500000000 time:5792839050 ns
B result:500000001500000000 time:4851602077 ns
C result:500000001500000000 time:4816570043 ns
D result:500000001500000000 time:1192 ns

nimcache (clang -c -w -O3 )

N_LIB_PRIVATE N_NIMCALL(NI, p_Aw3lesFExZt7MqrTSldVkA)(tyObject_TestAcolonObjectType__my7mzkOUxsMZjO5v1YyvCg* self, NI x) {
        NI result;
{	result = (NI)0;
        result = (NI)(x + ((NI) 2));
        goto BeforeRet_;
        }BeforeRet_: ;
        return result;
}

N_LIB_PRIVATE N_NIMCALL(NI, p_dm2n08nTnaT9czYrAnBLG9cQ)(tyObject_TestBcolonObjectType__9bi1P9aySy82QHeC9aSv1GsBw* self, NI x) {
        NI result;
{	result = (NI)0;
        result = (NI)(x + ((NI) 2));
        goto BeforeRet_;
        }BeforeRet_: ;
        return result;
}

N_LIB_PRIVATE N_NIMCALL(NI, p_T9bkbRDmlSvDBsMMNFJRmdg)(tyObject_TestCcolonObjectType__agr0Oxf9aZJtF1PQMpDWW8Q* self, NI x) {
        NI result;
{	result = (NI)0;
        result = (NI)(x + ((NI) 2));
        goto BeforeRet_;
        }BeforeRet_: ;
        return result;
}

N_LIB_PRIVATE N_NIMCALL(NI, p_7IeQfxWq7IzSKT9biOTaLAA)(tyObject_TestDcolonObjectType__QU8t7UNc2UQQ3cCM9bHpnHw* self, NI x) {
        NI result;
{	result = (NI)0;
        result = (NI)(x + ((NI) 2));
        goto BeforeRet_;
        }BeforeRet_: ;
        return result;
}

N_LIB_PRIVATE N_NIMCALL(NI, p_i5lHfL9bd85EwAlh0eEVYOQ)(tyObject_TestAcolonObjectType__my7mzkOUxsMZjO5v1YyvCg* self, NI x) {
        NI result;
{	result = (NI)0;
        chckNilDisp(self);
        {
                if (!((self) && ((*self).Sup.m_type == (&NTI_agr0Oxf9aZJtF1PQMpDWW8Q_)))) goto LA3_;
                result = p_T9bkbRDmlSvDBsMMNFJRmdg(((tyObject_TestCcolonObjectType__agr0Oxf9aZJtF1PQMpDWW8Q*) (self)), x);
                goto BeforeRet_;
        }
        goto LA1_;
        LA3_: ;
        {
                if (!((self) && (isObjWithCache((*self).Sup.m_type, (&NTI_9bi1P9aySy82QHeC9aSv1GsBw_), Nim_OfCheck_CACHE13)))) goto LA6_;
                result = p_dm2n08nTnaT9czYrAnBLG9cQ(((tyObject_TestBcolonObjectType__9bi1P9aySy82QHeC9aSv1GsBw*) (self)), x);
                goto BeforeRet_;
        }
        goto LA1_;
        LA6_: ;
        {
                if (!((self) && (isObjWithCache((*self).Sup.m_type, (&NTI_my7mzkOUxsMZjO5v1YyvCg_), Nim_OfCheck_CACHE14)))) goto LA9_;
                result = p_Aw3lesFExZt7MqrTSldVkA(self, x);
                goto BeforeRet_;
        }
        goto LA1_;
        LA9_: ;
        LA1_: ;
        }BeforeRet_: ;
        return result;
}

N_LIB_PRIVATE N_NIMCALL(NI, p_kWP9alFwS9aDbbtge4Kk9cdQA)(tyObject_TestDcolonObjectType__QU8t7UNc2UQQ3cCM9bHpnHw* self, NI x) {
        NI result;
{	result = (NI)0;
        result = p_7IeQfxWq7IzSKT9biOTaLAA(self, x);
        goto BeforeRet_;
        }BeforeRet_: ;
        return result;
}

bazi (orginal) [2018-07-13T13:06:41+02:00] view original

add

let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("C") : TestC()
n.bench("D") : TestD()
n.bench("E") : TestE()
n.bench("F") : TestF()

result


1000000000 times
A result:500000001500000000 time:6607109069 ns
B result:500000001500000000 time:5303513050 ns
C result:500000001500000000 time:5023578166 ns
D result:500000001500000000 time:0 ns
E result:500000001500000000 time:0 ns
F result:500000001500000000 time:953 ns

Araq (orginal) [2018-07-13T15:14:44+02:00] view original

Don't look at the C code. Look at the produced assembler code.

bazi (orginal) [2018-07-14T14:06:14+02:00] view original

I looked at the assembly and examined it, but in a very simple case it seems optimization will not hit.

However, I thought that it is not necessary to worry too much, except to make a very large amount of very simple calculations that would allow for a large optimization on the override.

For example, sqrt seems to have less difference.

thanks

simple x2 nim

from times import epochtime
from strutils import format
from math import sqrt

type
  TestA = ref object of RootObj
  TestB = ref object of TestA
  TestD = ref object of RootObj

method p (self:TestA, x:int) :int {.base.} = return x*2
method p (self:TestB, x:int) :int = return x*2
method p (self:TestD, x:int) :int {.base.} = return x*2

template bench (n:int, title:string, obj:untyped) :untyped =
  let startTime = epochtime()
  let tst = obj
  var res = 0
  for i in 0..<n : res += tst.p(i)
  echo "$# result:$# time:$# ns".format( title, res, int((epochtime() - startTime) * 1_000_000_000) )

let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("D") : TestD()

simple x2 result


1000000000 times
A result:999999999000000000 time:4097547054 ns
B result:999999999000000000 time:3608579874 ns
D result:999999999000000000 time:953 ns

simple x2 assembly TestA


        callq	_addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_5:
        movq	_tst_doe6pEB1jyWn6yLs773CEw@GOTPCREL(%rip), %rax
        movq	%rbx, (%rax)
        movq	_res_JV6l78E23Gc61irJB5Ow7w@GOTPCREL(%rip), %rax
        movq	$0, (%rax)
        movq	_n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
        cmpq	$0, (%rax)
        jle	LBB9_21
## BB#6:
        movl	$1, %r15d
        xorl	%r12d, %r12d
        movq	_i_vyCYEvJNp9c9b1bD8JVdQqrQ@GOTPCREL(%rip), %r13
        xorl	%r14d, %r14d
        jmp	LBB9_8
        .p2align	4, 0x90
LBB9_7:                                 ##   in Loop: Header=BB9_8 Depth=1
        incq	%r14
        movq	_tst_doe6pEB1jyWn6yLs773CEw@GOTPCREL(%rip), %rax
        movq	(%rax), %rbx
        incq	%r15
        addq	$2, %r12
LBB9_8:                                 ## =>This Inner Loop Header: Depth=1
        leaq	-1(%r15), %rax
        movq	%rax, (%r13)
        movq	%rbx, %rdi
        callq	_chckNilDisp
        testq	%rbx, %rbx
        je	LBB9_16
## BB#9:                                ##   in Loop: Header=BB9_8 Depth=1
        movq	(%rbx), %rdi
        cmpq	_NTI_7313k2M1TRNjWfEhebZVKw_@GOTPCREL(%rip), %rdi
        je	LBB9_17
## BB#10:                               ##   in Loop: Header=BB9_8 Depth=1
        cmpq	_NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rdi
        je	LBB9_15
## BB#11:                               ##   in Loop: Header=BB9_8 Depth=1
        movq	_NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rax
        cmpq	%rax, 16(%rdi)
        je	LBB9_15
## BB#12:                               ##   in Loop: Header=BB9_8 Depth=1
        cmpq	%rdi, _Nim_OfCheck_CACHE11(%rip)
        je	LBB9_16
## BB#13:                               ##   in Loop: Header=BB9_8 Depth=1
        cmpq	%rdi, _Nim_OfCheck_CACHE11+8(%rip)
        je	LBB9_15
## BB#14:                               ##   in Loop: Header=BB9_8 Depth=1
        movq	_NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rsi
        leaq	_Nim_OfCheck_CACHE11(%rip), %rdx
        callq	_isObjSlowPath_k9bdq9bQE075AR7scLFt5wIg
        testb	%al, %al
        je	LBB9_16
        .p2align	4, 0x90
LBB9_15:                                ##   in Loop: Header=BB9_8 Depth=1
        movq	%r12, %rax
        jmp	LBB9_18
        .p2align	4, 0x90
LBB9_16:                                ##   in Loop: Header=BB9_8 Depth=1
        xorl	%eax, %eax
        jmp	LBB9_18
        .p2align	4, 0x90
LBB9_17:                                ##   in Loop: Header=BB9_8 Depth=1
        leaq	(%r14,%r14), %rax
LBB9_18:                                ##   in Loop: Header=BB9_8 Depth=1
        movq	_res_JV6l78E23Gc61irJB5Ow7w@GOTPCREL(%rip), %rcx
        addq	%rax, (%rcx)
        movq	_n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
        cmpq	(%rax), %r15
        jl	LBB9_7
LBB9_21:
        leaq	_TM_ipcYmBC9bj9a1BW35ABoB1Kw_5(%rip), %rdi
        callq	_copyString

simple x2 assembly TestD


        callq	_addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_47:
        movq	%rbx, (%r15)
        movq	_res_JV6l78E23Gc61irJB5Ow7w_3@GOTPCREL(%rip), %rbx
        movq	$0, (%rbx)
        movq	_n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
        movq	(%rax), %rax
        testq	%rax, %rax
        jle	LBB9_49
## BB#48:
        leaq	-1(%rax), %rcx
        leaq	-2(%rax), %rdx
        imulq	%rcx, %rdx
        andq	$-2, %rdx
        leaq	-2(%rdx,%rax,2), %rax
        movq	_i_vyCYEvJNp9c9b1bD8JVdQqrQ_3@GOTPCREL(%rip), %rdx
        movq	%rcx, (%rdx)
        movq	%rax, (%rbx)
LBB9_49:
        leaq	_TM_ipcYmBC9bj9a1BW35ABoB1Kw_9(%rip), %rdi
        callq	_copyString

sqrt nim

from times import epochtime
from strutils import format
from math import sqrt

type
  TestA = ref object of RootObj
  TestB = ref object of TestA
  TestD = ref object of RootObj

method p (self:TestA, x:float64) :float64 {.base.} = return x.sqrt
method p (self:TestB, x:float64) :float64 = return x.sqrt
method p (self:TestD, x:float64) :float64 {.base.} = return x.sqrt

template bench (n:int, title:string, obj:untyped) :untyped =
  let startTime = epochtime()
  let tst = obj
  var res = 0.0
  for i in 0..<n : res += tst.p(float64(i))
  echo "$# result:$# time:$# ns".format( title, res, int((epochtime() - startTime) * 1_000_000_000) )

let n = 1_000_000_000
echo n, " times"
n.bench("A") : TestA()
n.bench("B") : TestB()
n.bench("D") : TestD()

sqrt result


1000000000 times
A result:21081851051977.78 time:4417852163 ns
B result:21081851051977.78 time:4571517229 ns
D result:21081851051977.78 time:4466888189 ns

sqrt assembly TestA


        callq	_addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_5:
        movq	%r15, (%r13)
        movq	_res_JV6l78E23Gc61irJB5Ow7w@GOTPCREL(%rip), %r14
        movq	$0, (%r14)
        movq	_n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
        cmpq	$0, (%rax)
        jle	LBB9_17
## BB#6:
        xorl	%ebx, %ebx
        movq	_i_vyCYEvJNp9c9b1bD8JVdQqrQ@GOTPCREL(%rip), %r12
        jmp	LBB9_7
        .p2align	4, 0x90
LBB9_16:                                ##   in Loop: Header=BB9_7 Depth=1
        movq	(%r13), %r15
LBB9_7:                                 ## =>This Inner Loop Header: Depth=1
        movq	%rbx, (%r12)
        movq	%r15, %rdi
        callq	_chckNilDisp
        xorpd	%xmm0, %xmm0
        testq	%r15, %r15
        je	LBB9_15
## BB#8:                                ##   in Loop: Header=BB9_7 Depth=1
        xorps	%xmm1, %xmm1
        cvtsi2sdq	%rbx, %xmm1
        movq	(%r15), %rdi
        cmpq	_NTI_7313k2M1TRNjWfEhebZVKw_@GOTPCREL(%rip), %rdi
        je	LBB9_14
## BB#9:                                ##   in Loop: Header=BB9_7 Depth=1
        cmpq	_NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rdi
        je	LBB9_14
## BB#10:                               ##   in Loop: Header=BB9_7 Depth=1
        movq	_NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rax
        cmpq	%rax, 16(%rdi)
        je	LBB9_14
## BB#11:                               ##   in Loop: Header=BB9_7 Depth=1
        cmpq	%rdi, _Nim_OfCheck_CACHE11(%rip)
        je	LBB9_15
## BB#12:                               ##   in Loop: Header=BB9_7 Depth=1
        cmpq	%rdi, _Nim_OfCheck_CACHE11+8(%rip)
        je	LBB9_14
## BB#13:                               ##   in Loop: Header=BB9_7 Depth=1
        movq	_NTI_DIg9am9aowJjDY2gX3Qna9b4g_@GOTPCREL(%rip), %rsi
        leaq	_Nim_OfCheck_CACHE11(%rip), %rdx
        movsd	%xmm1, -168(%rbp)       ## 8-byte Spill
        callq	_isObjSlowPath_k9bdq9bQE075AR7scLFt5wIg
        movsd	-168(%rbp), %xmm1       ## 8-byte Reload
                                        ## xmm1 = mem[0],zero
        xorpd	%xmm0, %xmm0
        testb	%al, %al
        je	LBB9_15
        .p2align	4, 0x90
LBB9_14:                                ##   in Loop: Header=BB9_7 Depth=1
        xorps	%xmm0, %xmm0
        sqrtsd	%xmm1, %xmm0
LBB9_15:                                ##   in Loop: Header=BB9_7 Depth=1
        addsd	(%r14), %xmm0
        movsd	%xmm0, (%r14)
        incq	%rbx
        movq	_n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
        cmpq	(%rax), %rbx
        jl	LBB9_16
LBB9_17:
        leaq	_TM_ipcYmBC9bj9a1BW35ABoB1Kw_5(%rip), %rdi
        callq	_copyString

sqrt assembly TestD


        callq	_addZCT_fCDI7oO1NNVXXURtxSzsRw
LBB9_39:
        movq	%rbx, (%r15)
        movq	_res_JV6l78E23Gc61irJB5Ow7w_3@GOTPCREL(%rip), %rbx
        movq	$0, (%rbx)
        movq	_n_Ny5Ou9b9c0CyXz9a1MCN32XWw@GOTPCREL(%rip), %rax
        movq	(%rax), %rdx
        testq	%rdx, %rdx
        jle	LBB9_50
## BB#40:
        leaq	-1(%rdx), %rcx
        movl	%edx, %eax
        andl	$3, %eax
        cmpq	$3, %rcx
        jae	LBB9_42
## BB#41:
        xorpd	%xmm0, %xmm0
        xorl	%ecx, %ecx
                                        ## implicit-def: %RDX
        testq	%rax, %rax
        jne	LBB9_46
        jmp	LBB9_49
LBB9_42:
        subq	%rax, %rdx
        xorpd	%xmm0, %xmm0
        xorl	%ecx, %ecx
        .p2align	4, 0x90
LBB9_43:                                ## =>This Inner Loop Header: Depth=1
        xorps	%xmm1, %xmm1
        cvtsi2sdq	%rcx, %xmm1
        sqrtsd	%xmm1, %xmm1
        addsd	%xmm0, %xmm1
        leaq	1(%rcx), %rsi
        xorps	%xmm0, %xmm0
        cvtsi2sdq	%rsi, %xmm0
        sqrtsd	%xmm0, %xmm0
        addsd	%xmm1, %xmm0
        leaq	2(%rcx), %rsi
        xorps	%xmm1, %xmm1
        cvtsi2sdq	%rsi, %xmm1
        sqrtsd	%xmm1, %xmm1
        addsd	%xmm0, %xmm1
        leaq	3(%rcx), %rsi
        xorps	%xmm0, %xmm0
        cvtsi2sdq	%rsi, %xmm0
        sqrtsd	%xmm0, %xmm0
        addsd	%xmm1, %xmm0
        addq	$4, %rcx
        cmpq	%rcx, %rdx
        jne	LBB9_43
## BB#44:
        leaq	-1(%rcx), %rdx
        testq	%rax, %rax
        je	LBB9_49
LBB9_46:
        negq	%rax
        .p2align	4, 0x90
LBB9_47:                                ## =>This Inner Loop Header: Depth=1
        xorps	%xmm1, %xmm1
        cvtsi2sdq	%rcx, %xmm1
        sqrtsd	%xmm1, %xmm1
        addsd	%xmm1, %xmm0
        incq	%rcx
        incq	%rax
        jne	LBB9_47
## BB#48:
        decq	%rcx
        movq	%rcx, %rdx
LBB9_49:
        movq	_i_vyCYEvJNp9c9b1bD8JVdQqrQ_3@GOTPCREL(%rip), %rax
        movq	%rdx, (%rax)
        movsd	%xmm0, (%rbx)
LBB9_50:
        leaq	_TM_ipcYmBC9bj9a1BW35ABoB1Kw_9(%rip), %rdi
        callq	_copyString

JohnNovak (orginal) [2018-07-17T23:29:32+02:00] view original

Without reading your asm output in detail, I think compiler constant folding hit you. You need to manufacture benchmarks where the compiler is unable to do this. E.g. generating random unpredictable input or reading the input from a file. You might want to read this article I wrote on the topic:

http://blog.johnnovak.net/2017/04/22/nim-performance-tuning-for-the-uninitiated/

Mirror of forum.nim-lang.org

4045 :: Is the overriding method not optimized?