This short-string-optimized, copy-on-write string implementation seems to be very good:
const
AlwaysAvail = 7
PayloadSize = AlwaysAvail + sizeof(pointer)
proc atomicAddFetch(p: var int; v: int): int {.importc: "__sync_add_and_fetch", nodecl.}
proc atomicSubFetch(p: var int; v: int): int {.importc: "__sync_sub_and_fetch", nodecl.}
type
LongString = object
rc: int # atomic reference count; 1 = unique owner
fullLen: int
capImpl: int # bit 0: heap-allocated; upper bits: capacity (cap = capImpl shr 1)
data: UncheckedArray[char]
SmallString = object
slen: byte # when > PayloadSize, `more` is valid ptr
payload: array[AlwaysAvail, char]
more: ptr LongString # when long: pointer; when small (len 8..15): bytes 7..14 stored here
proc `=destroy`*(s: var SmallString) =
if int(s.slen) > PayloadSize and (s.more.capImpl and 1) == 1:
if atomicSubFetch(s.more.rc, 1) == 0:
dealloc(s.more)
proc `=wasMoved`*(s: var SmallString) {.inline.} =
s.slen = 0
proc `=sink`*(dst: var SmallString; src: SmallString) =
`=destroy`(dst)
copyMem(addr dst, unsafeAddr src, sizeof(SmallString))
proc `=copy`*(dst: var SmallString; src: SmallString) =
if int(src.slen) <= PayloadSize:
`=destroy`(dst) # dst may have been a long string
copyMem(addr dst, unsafeAddr src, sizeof(SmallString))
else:
if addr(dst) == unsafeAddr(src): return
`=destroy`(dst)
# COW: share the block, bump refcount — no allocation needed
if (src.more.capImpl and 1) == 1:
discard atomicAddFetch(src.more.rc, 1)
copyMem(addr dst, unsafeAddr src, sizeof(SmallString))
proc `=dup`*(src: SmallString): SmallString =
copyMem(addr result, unsafeAddr src, sizeof(SmallString))
if int(src.slen) > PayloadSize and (src.more.capImpl and 1) == 1:
discard atomicAddFetch(src.more.rc, 1)
proc ensureUniqueLong(s: var SmallString; oldLen, newLen: int) =
# Ensure s.more is a unique (rc=1) heap block with capacity >= newLen, preserving existing data.
# s must already be a long string on entry.
let heapAlloc = (s.more.capImpl and 1) == 1
let unique = heapAlloc and s.more.rc == 1
let cap = s.more.capImpl shr 1
if unique and newLen <= cap:
s.more.fullLen = newLen
else:
let newCap = max(newLen, oldLen * 2)
let p = cast[ptr LongString](alloc(sizeof(int) * 3 + newCap + 1))
p.rc = 1
p.fullLen = newLen
p.capImpl = (newCap shl 1) or 1
let old = s.more
copyMem(addr p.data[0], addr old.data[0], oldLen)
if heapAlloc and atomicSubFetch(old.rc, 1) == 0:
dealloc(old)
s.more = p
proc len*(s: SmallString): int {.inline.} =
result = int s.slen
if result > PayloadSize:
result = s.more.fullLen
template guts(s: SmallString): (int, ptr UncheckedArray[char]) =
let slen = int s.slen
if slen > PayloadSize:
(s.more.fullLen, cast[ptr UncheckedArray[char]](addr s.more.data[0]))
else:
(slen, cast[ptr UncheckedArray[char]](addr s.payload[0]))
proc `[]`*(s: SmallString; i: int): char {.inline.} =
let (l, p) = s.guts
assert i < l
result = p[i]
proc `[]=`*(s: var SmallString; i: int; c: char) =
let slen = int s.slen
if slen <= PayloadSize:
# unchecked: when i >= 7 we store into the `more` overlay
(cast[ptr UncheckedArray[char]](addr s.payload[0]))[i] = c
else:
let l = s.more.fullLen
ensureUniqueLong(s, l, l) # COW if shared; length unchanged
s.more.data[i] = c
if i < AlwaysAvail:
s.payload[i] = c
proc cmp*(a, b: SmallString): int =
# Use slen directly for prefix length: for short/medium it is the real length,
# for long it is the sentinel (> AlwaysAvail), so min(..., AlwaysAvail) still gives 7.
# This avoids dereferencing `more` before the prefix comparison.
let pfxLen = min(min(int a.slen, int b.slen), AlwaysAvail)
result = cmpMem(unsafeAddr a.payload[0], unsafeAddr b.payload[0], pfxLen)
if result != 0: return
# Prefix matched — now fetch actual lengths (dereferences `more` only if long)
let la = if int(a.slen) > PayloadSize: a.more.fullLen else: int(a.slen)
let lb = if int(b.slen) > PayloadSize: b.more.fullLen else: int(b.slen)
let minLen = min(la, lb)
if minLen <= AlwaysAvail:
result = la - lb
return
let (_, pa) = a.guts
let (_, pb) = b.guts
result = cmpMem(addr pa[AlwaysAvail], addr pb[AlwaysAvail], minLen - AlwaysAvail)
if result == 0:
result = la - lb
proc `==`*(a, b: SmallString): bool =
if a.slen != b.slen: return false
# slen equal: for short/medium this means equal lengths; for long (both sentinel) we still need fullLen.
let slen = int(a.slen)
let pfxLen = min(slen, AlwaysAvail)
if cmpMem(unsafeAddr a.payload[0], unsafeAddr b.payload[0], pfxLen) != 0: return false
if slen <= AlwaysAvail: return true
if slen <= PayloadSize:
# medium: guts gives the UncheckedArray without a heap dereference
let (la, pa) = a.guts
let (_, pb) = b.guts
return cmpMem(addr pa[pfxLen], addr pb[pfxLen], la - pfxLen) == 0
# long: fetch actual lengths only after prefix matched
let la = a.more.fullLen
if la != b.more.fullLen: return false
cmpMem(addr a.more.data[pfxLen], addr b.more.data[pfxLen], la - pfxLen) == 0
proc `<=`*(a, b: SmallString): bool {.inline.} = cmp(a, b) <= 0
proc continuesWith*(s, sub: SmallString; start: int): bool =
let subLen = sub.len
if start < 0 or start + subLen > s.len: return false
if subLen == 0: return true
# How many bytes starting at `start` are in s's inline payload?
let pfxLen = min(subLen, max(0, AlwaysAvail - start))
if pfxLen > 0:
# sub.payload always holds sub's first AlwaysAvail bytes, so pfxLen <= AlwaysAvail is fine
if cmpMem(unsafeAddr s.payload[start], unsafeAddr sub.payload[0], pfxLen) != 0:
return false
if pfxLen == subLen: return true
# Prefix matched or start >= AlwaysAvail; fall back to full data
let (_, sp) = s.guts
let (_, subp) = sub.guts
cmpMem(addr sp[start + pfxLen], addr subp[pfxLen], subLen - pfxLen) == 0
proc startsWith*(s, sub: SmallString): bool {.inline.} = continuesWith(s, sub, 0)
proc endsWith*(s, sub: SmallString): bool {.inline.} = continuesWith(s, sub, s.len - sub.len)
proc add*(s: var SmallString; c: char) =
let slen = int(s.slen)
if slen <= PayloadSize:
let newLen = slen + 1
if newLen <= PayloadSize:
cast[ptr UncheckedArray[char]](addr s.payload[0])[slen] = c
s.slen = byte(newLen)
else:
# transition from medium (slen == PayloadSize) to long
let cap = newLen * 2
let p = cast[ptr LongString](alloc(sizeof(int) * 3 + cap + 1))
p.rc = 1
p.fullLen = newLen
p.capImpl = (cap shl 1) or 1
copyMem(addr p.data[0], cast[ptr UncheckedArray[char]](addr s.payload[0]), slen)
p.data[slen] = c
# payload[0..AlwaysAvail-1] already correct; slen >= AlwaysAvail so no update needed
s.more = p
s.slen = byte(PayloadSize + 1)
else:
let l = s.more.fullLen # fetch fullLen only in the long path
ensureUniqueLong(s, l, l + 1)
s.more.data[l] = c
# l >= PayloadSize > AlwaysAvail, so prefix is unaffected
proc add*(s: var SmallString; t: SmallString) =
let slen = int(s.slen)
let (tl, tp) = t.guts # fetch t's guts before any mutation (aliasing safety)
if tl == 0: return
if slen <= PayloadSize:
let sl = slen # for short/medium, slen IS the actual length
let newLen = sl + tl
if newLen <= PayloadSize:
copyMem(addr cast[ptr UncheckedArray[char]](addr s.payload[0])[sl], tp, tl)
s.slen = byte(newLen)
else:
# transition to long
let cap = newLen * 2
let p = cast[ptr LongString](alloc(sizeof(int) * 3 + cap + 1))
p.rc = 1
p.fullLen = newLen
p.capImpl = (cap shl 1) or 1
copyMem(addr p.data[0], cast[ptr UncheckedArray[char]](addr s.payload[0]), sl)
copyMem(addr p.data[sl], tp, tl)
# update prefix bytes that come from t (only when sl < AlwaysAvail)
if sl < AlwaysAvail:
copyMem(addr s.payload[sl], tp, min(AlwaysAvail - sl, tl))
s.more = p
s.slen = byte(PayloadSize + 1)
else:
let sl = s.more.fullLen # fetch fullLen only in the long path
let newLen = sl + tl
# tp was read before ensureUniqueLong: if t.more == s.more, rc decrements but won't hit 0
ensureUniqueLong(s, sl, newLen)
copyMem(addr s.more.data[sl], tp, tl)
# sl >= PayloadSize > AlwaysAvail, so prefix is unaffected
proc `&`*(a, b: SmallString): SmallString =
result = a
result.add(b)
proc toSmallString*(s: openArray[char]): SmallString =
let l = s.len
if l == 0: return
if l <= PayloadSize:
result.slen = byte(l)
copyMem(cast[ptr UncheckedArray[char]](addr result.payload[0]), unsafeAddr s[0], l)
else:
let p = cast[ptr LongString](alloc(sizeof(int) * 3 + l + 1))
p.rc = 1
p.fullLen = l
p.capImpl = (l shl 1) or 1
copyMem(addr p.data[0], unsafeAddr s[0], l)
copyMem(addr result.payload[0], unsafeAddr s[0], AlwaysAvail)
result.slen = byte(PayloadSize + 1)
result.more = p
proc toSmallString*(s: string): SmallString =
toSmallString(s.toOpenArray(0, s.high))
proc `$`*(s: SmallString): string =
let (l, p) = s.guts
result = newString(l)
if l > 0:
copyMem(unsafeAddr result[0], p, l)
when isMainModule:
var s = toSmallString("Hello, world!")
s.add toSmallString("more")
echo s
It's complete enough for you to tinker with but the real value would be in patching Nim 2 (or Nimony) to use this implementation for ARC/ORC.
Properties:
I just feed your post and I got this: https://github.com/mantielero/sso_challenge
I am not sure if this is useful.
The AI said:
Key Features Implemented
- 16-byte size - fits in two CPU registers
- Short strings (≤15 bytes) - stored inline, zero heap allocations
- Long strings - copy-on-write with atomic reference counting
- 7-byte inline prefix - fast comparison without memory fetches
- Full API: creation, mutation, comparison, indexing, iteration
Test Results
✅ All 30+ unit tests pass covering:
- Basics, mutation, comparison, substring ops
- COW semantics, iteration, utilities, edge cases
Benchmark Highlights
┌──────────────┬────────────────────────────────────┐
│ Operation │ SmallString Performance │
├──────────────┼────────────────────────────────────┤
│ Short concat │ 16 ns/iter (2.7x faster than std) │
│ COW+modify │ 50 ns/iter (competitive) │
│ Long cmp │ 0.0006 ns/iter (fast prefix check) │
└──────────────┴────────────────────────────────────┘
Learn how to read. Let me quote myself:
"Long strings use copy-on-write with atomics, these should be rare enough that the sharing of memory pays off and the atomics don't matter. This needs benchmarking though, maybe it's overkill and a pessimization."