There is bug with refc or MarkAndSweep gc combine msvc compiler.
And it is reported as https://github.com/nim-lang/Nim/issues/22648.
Here is the code:
import strformat, strutils
let SIZE = 10_000_000
let COUNT = 99
proc printf*(formatstr: cstring) {.header: "<stdio.h>", importc: "printf", noSideEffect, varargs.}
proc get_all_codes(): seq[string] =
for i in 1..COUNT:
result.add align($i, SIZE, '0')
proc get_names(codes: seq[string]): seq[string] =
for code in codes:
result.add "abc" & code
proc check_codes() {.exportc.} =
var lines: seq[string]
var codes = get_all_codes()
var names = get_names(codes)
# cell_to_check = cast[int](names) - 16 # I add an variable to record pointer when gc will output when it is freeed
printf "--------- start names pointer is %p\n", cast[int](names) - 16
let L = codes.len
for i in 0..<L:
var name = names[i]
var code = codes[i]
printf "Iterations %d code.len = %d name.len = %d\n", i+1, code.len, name.len
# this should not be happened
if code.len != SIZE or name.len != SIZE + 3:
printf "oops %d %d %d", i+1, code.len, name.len
quit()
lines.add fmt"{code}:{name}"
printf "ok %d\n", lines.len
when isMainModule:
check_codes()
This the code output, when compile with the msvc -d:release.
--------- start names pointer is 000001A5D6390460
Iterations 1 code.len = 10000000 name.len = 10000003
Iterations 2 code.len = 10000000 name.len = 10000003
Iterations 3 code.len = 10000000 name.len = 10000003
Iterations 4 code.len = 10000000 name.len = 10000003
--------- dealloc 000001A5D6390460
Iterations 5 code.len = 10000000 name.len = 10000003
Iterations 6 code.len = 10000000 name.len = 10000000
oops 6 10000000 10000000
The memory of variable names is dealloced at the iteration 4.
The reason of bug is simple, but I don't konw how to fix it.
The refc and MarkAndSweep will scan the stack and register to check if there is any address or register reference to the allocated local variable, normally, this method is very simple and no problem. But when the msvc optimize the previous code, there is no any stack address or register reference to the variable names, there is only reference to the address names->data, so when in the MarkAndSweep stage, the memroy of variable names is deallocaed.
Here is assembler code:
check_codes LABEL NEAR
; Note: Prefix bit or byte has no meaning in this context
push rbx ; 0000 _ 40: 53
push rbp ; 0002 _ 55
push rdi ; 0003 _ 57
push r12 ; 0004 _ 41: 54
push r13 ; 0006 _ 41: 55
sub rsp, 48 ; 0008 _ 48: 83. EC, 30
xor ebx, ebx ; 000C _ 33. DB
mov qword ptr [rsp+68H], rbx ; 000E _ 48: 89. 5C 24, 68
call get_all_codes__bug5050545256_u8 ; 0013 _ E8, 00000000(rel)
mov rcx, rax ; 0018 _ 48: 8B. C8
mov rdi, rax ; 001B _ 48: 8B. F8
call get_names__bug5050545256_u19 ; 001E _ E8, 00000000(rel)
lea rcx, ptr [??_C@_0CF@CADBCMPJ@?9?9?9?9?9?9?9?9?9?5start?5names?5pointer?5is@]; 0023 _ 48: 8D. 0D, 00000000(rel)
mov r13, rax ; 002A _ 4C: 8B. E8 # r13 save the names
lea rdx, ptr [rax-10H] ; 002D _ 48: 8D. 50, F0
mov qword ptr [cell_to_check__system_u5847], rdx; 0031 _ 48: 89. 15, 00000000(rel)
call qword ptr [__imp_printf] ; 0038 _ FF. 15, 00000000(rel)
test rdi, rdi ; 003E _ 48: 85. FF
jz ?_006 ; 0041 _ 74, 05
mov r12, qword ptr [rdi] ; 0043 _ 4C: 8B. 27
jmp ?_007 ; 0046 _ EB, 03
?_006: mov r12, rbx ; 0048 _ 4C: 8B. E3
?_007: mov qword ptr [rsp+70H], rsi ; 004B _ 48: 89. 74 24, 70
mov rbp, rbx ; 0050 _ 48: 8B. EB
mov qword ptr [rsp+28H], r14 ; 0053 _ 4C: 89. 74 24, 28
mov qword ptr [rsp+20H], r15 ; 0058 _ 4C: 89. 7C 24, 20
test r12, r12 ; 005D _ 4D: 85. E4
jle ?_019 ; 0060 _ 0F 8E, 0000015C
lea r14, ptr [rdi+10H] ; 0066 _ 4C: 8D. 77, 10
sub r13, rdi ; 006A _ 4C: 2B. EF # r13 is no longer point to names
; Filling space: 3H
; Filler type: Multi-byte NOP
; db 0FH, 1FH, 00H
ALIGN 8
?_008: mov rcx, qword ptr [r14+r13] ; 0070 _ 4B: 8B. 0C 2E