The resulting C code for the following code example code produces an expensive, unnecessary copy of a Position in the function inCheck when compiled with nim c -d:danger -d:lto --panics:on --gc:arc --run main.nim
type Position* = object
pieces: array[6, uint64]
colors: array[2, uint64]
func isAttacked*(position: Position, target: int8): bool =
false
func kingSquare*(position: Position): int8 =
12
func inCheck*(position: Position): bool =
position.isAttacked(position.kingSquare)
var p: Position
echo p.inCheck()
Resulting C code:
N_LIB_PRIVATE N_NIMCALL(NIM_BOOL, isAttacked_main_4)(tyObject_Position__XJEOqi2mMysguMih68T9bQg* position, NI8 target) {
NIM_BOOL result;
result = (NIM_BOOL)0;
result = NIM_FALSE;
return result;
}
N_LIB_PRIVATE N_NIMCALL(NI8, kingSquare_main_8)(tyObject_Position__XJEOqi2mMysguMih68T9bQg* position) {
NI8 result;
result = (NI8)0;
result = ((NI8) 12);
return result;
}
N_LIB_PRIVATE N_NIMCALL(NIM_BOOL, inCheck_main_11)(tyObject_Position__XJEOqi2mMysguMih68T9bQg* position) {
NIM_BOOL result;
tyObject_Position__XJEOqi2mMysguMih68T9bQg T1_;
NI8 T2_;
result = (NIM_BOOL)0;
T1_ = (*position); // I believe this is the expensive copy
T2_ = (NI8)0;
T2_ = kingSquare_main_8(position);
result = isAttacked_main_4((&T1_), T2_);
return result;
}
The expensive copy is omitted when compiling with nim c -d:danger -d:lto --gc:refc --run main.nim:
N_LIB_PRIVATE N_NIMCALL(NIM_BOOL, isAttacked_main_4)(tyObject_Position__XJEOqi2mMysguMih68T9bQg* position, NI8 target) {
NIM_BOOL result;
result = (NIM_BOOL)0;
result = NIM_FALSE;
return result;
}
N_LIB_PRIVATE N_NIMCALL(NI8, kingSquare_main_8)(tyObject_Position__XJEOqi2mMysguMih68T9bQg* position) {
NI8 result;
result = (NI8)0;
result = ((NI8) 12);
return result;
}
N_LIB_PRIVATE N_NIMCALL(NIM_BOOL, inCheck_main_11)(tyObject_Position__XJEOqi2mMysguMih68T9bQg* position) {
NIM_BOOL result;
NI8 T1_;
result = (NIM_BOOL)0;
T1_ = (NI8)0;
T1_ = kingSquare_main_8(position);
result = isAttacked_main_4(position, T1_);
return result;
}
Is this a bug that can be fixed (because I suspect that this issue is the reason why my program is slower with --gc:arc than with the default GC), or is this inherent to the design of ARC?Do you really think that copy operation is still available in the final executable?
I would doubt that, gcc and clang should detect this temporary variable and just remove the copy operation. So better inspect the asm code before complaining.
Wait, I modified the example, now the assembly is (very likely) more expensive with --gc:arc (I don't know assembly very well):
type Position* = object
pieces: array[6, uint64]
colors: array[2, uint64]
func isAttacked*(position: Position, enemy: int8, target: int8): bool =
position.colors[enemy] != 0
func kingSquare*(position: Position): int8 =
12
func inCheck*(position: Position, enemy: int8): bool =
position.isAttacked(enemy, position.kingSquare)
var p: Position
echo p.inCheck(1)
with nim c -d:danger --passC:"-g" main.nim it looks like this:
000000000000e510 <inCheck_main_12>:
e510: 48 0f be f6 movsx rsi,sil
e514: 48 83 7c f7 30 00 cmp QWORD PTR [rdi+rsi*8+0x30],0x0
e51a: 0f 95 c0 setne al
e51d: c3 ret
e51e: 66 90 xchg ax,ax
with nim c -d:danger --panics:on --gc:arc --passC:"-g" main.nim it looks like this:
0000000000005e70 <inCheck_main_12>:
5e70: 48 83 ec 58 sub rsp,0x58
5e74: f3 0f 6f 07 movdqu xmm0,XMMWORD PTR [rdi]
5e78: f3 0f 6f 4f 10 movdqu xmm1,XMMWORD PTR [rdi+0x10]
5e7d: 48 0f be f6 movsx rsi,sil
5e81: 64 48 8b 04 25 28 00 00 00 mov rax,QWORD PTR fs:0x28
5e8a: 48 89 44 24 48 mov QWORD PTR [rsp+0x48],rax
5e8f: 31 c0 xor eax,eax
5e91: f3 0f 6f 57 20 movdqu xmm2,XMMWORD PTR [rdi+0x20]
5e96: f3 0f 6f 5f 30 movdqu xmm3,XMMWORD PTR [rdi+0x30]
5e9b: 0f 29 04 24 movaps XMMWORD PTR [rsp],xmm0
5e9f: 0f 29 4c 24 10 movaps XMMWORD PTR [rsp+0x10],xmm1
5ea4: 0f 29 54 24 20 movaps XMMWORD PTR [rsp+0x20],xmm2
5ea9: 0f 29 5c 24 30 movaps XMMWORD PTR [rsp+0x30],xmm3
5eae: 48 83 7c f4 30 00 cmp QWORD PTR [rsp+rsi*8+0x30],0x0
5eb4: 0f 95 c0 setne al
5eb7: 48 8b 54 24 48 mov rdx,QWORD PTR [rsp+0x48]
5ebc: 64 48 2b 14 25 28 00 00 00 sub rdx,QWORD PTR fs:0x28
5ec5: 75 05 jne 5ecc <inCheck_main_12+0x5c>
5ec7: 48 83 c4 58 add rsp,0x58
5ecb: c3 ret
5ecc: e8 8f b1 ff ff call 1060 <__stack_chk_fail@plt>
5ed1: 66 66 2e 0f 1f 84 00 00 00 00 00 data16 cs nop WORD PTR [rax+rax*1+0x0]
5edc: 0f 1f 40 00 nop DWORD PTR [rax+0x0]