It doesn't feel like Nim code because it has pointers, can you show me the Nim way?
type
TokenKind = enum tokenNested, tokenSingle
Token = ref object
case kind: TokenKind
of tokenNested:
tokens: seq[Token]
of tokenSingle:
content: char
else: nil
proc tokenize(text: string): seq[Token] =
result = @[]
var current, latest: ptr seq[Token]
current = addr result
for it in text:
case it
of '[':
latest = current
let t = Token(kind: tokenNested, tokens: @[])
current = addr t.tokens
latest[].add(t)
of ']':
current = latest
of '>', '<', '+', '-', '.', ',':
current[].add(Token(kind: tokenSingle, content: it))
I don't know if this is Nim's style or not, but your version somewhat more efficient
type
TokenKind = enum tokenNested, tokenSingle
Token = ref object
case kind: TokenKind
of tokenNested:
tokens: seq[Token]
of tokenSingle:
content: char
else: nil
proc tokenize(text: string): seq[Token] =
result = @[]
var current, latest: ptr seq[Token]
current = addr result
for it in text:
case it
of '[':
latest = current
let t = Token(kind: tokenNested, tokens: @[])
current = addr t.tokens
latest[].add(t)
of ']':
current = latest
of '>', '<', '+', '-', '.', ',':
current[].add(Token(kind: tokenSingle, content: it))
else:
continue
import strutils
proc `$`(t: Token): string =
result = ""
case t.kind
of tokenNested:
result &= "tokens{" & t.tokens.join(" ") & "}"
of tokenSingle:
result &= $t.content
else:
discard
proc tokenize2(text: string): seq[Token] =
proc rec(txt: string): tuple[res: seq[Token], length: int] =
var walk = 0
var bufr = newSeq[Token]()
var path = txt.len
while walk < path:
var t = txt[walk]
inc walk
case t
of ']':
return (bufr, walk)
of '[':
var (nest, leap) = rec(txt[walk .. ^1])
walk += leap
bufr.add Token(kind: tokenNested, tokens: nest)
of '>', '<', '+', '-', '.', ',':
bufr.add Token(kind: tokenSingle, content: t)
else:
discard
(bufr, walk)
var (res, length) = rec(text)
result = res
echo tokenize("+-<>[++--].,.")
echo tokenize2("+-<>[++--].,.")
you should be able to just remove the ref in Token = ref object. That is one layer of indirection less, and therefore faster and less memory consumption. And a little comment on what you are doing. Tokens are not nested. What you are doing is already parsing. In brainfuck all tokens are characters, so tokenization is basically irrelevant.
And the last part is setting current and latest. That is a very dirty way of programming. As soon as programs become more complex you will no longer know what side effects your functions have. It is better if you find a way to encode current and latest as arguments and return values. You don't need any mutable state, if you return at the top level a nested Token instead of seq.
You should know that none of these recommendations are special Nim style, they are good programming style in general.
@Krux02, did you replied the TS code or my tokenize2 ?
For removing ref in ref object, I agree. I usually code in value-type instead ref-type, as I only use ref-type when it's usually wrap around costly resource, like socket, heaped memory, file i/o.
Agree too with coding by setting pointer for current and latest , that style is discouraged.