Is there a way to define recursive iterators? I'd need to unpack an arbitrarily nested sequence of objects and I thought using an iterator would be the most efficient solution. I tried this, but I get is expression 'flatten' cannot be called at flatten(T, source[0])
iterator flatten[T](source: seq[T]): T =
## Flattens an arbitrarily nested
## sequence
if T isnot seq:
for element in source:
yield element
else:
flatten(T, source[0])
iterator flatten(source: auto): auto =
for element in flatten[typeof stripGeneric(source)](source):
yield element
For context, stripGeneric is defined as follows:
proc stripGeneric[T](s: typedesc[seq[T]]): auto =
## Recursively calls itself and unnests the
## sequence until the base dataTypee is found.
## This is done at compile-time since it is
## needed for generics to work properly
when T is seq:
var tmp: typeof stripGeneric(T)
return tmp
else:
var tmp: T
return tmp
proc stripGeneric[T](s: seq[T]): auto =
## Helper to recursively strip an arbitrarily nested sequence
## and get the data type of the tensor
var tmp: typeof stripGeneric(typeof s)
return tmp
I need this because I'm implementing a small tensor library (yes, I know arraymancer exists, I wanna roll my own version for learning purposes) and I have two newTensor procedures currently defined like this (kudos to @haxscramper on Telegram for helping with this and with stripGeneric as well):
proc newTensor[T: seq](shape: seq[int], input: seq[T]): auto =
## Initializes a new 1-dimensional tensor (base case)
assert len(shape) == 1, &"invalid shape length ({shape.len()}) for tensor"
assert len(input) == shape[0], &"invalid input length ({input.len()}) for tensor of shape {shape}"
result.container = CPUStorage[T](data: input)
result.shape = shape
result.rank = 1
proc newTensor[T: seq](shape: seq[int], input: seq[T]): auto =
# Initializes a new N-dimensional tensor with the given shape
var res: Tensor[typeof stripGeneric(input)]
res.shape = shape
res.rank = len(res.shape)
res.container.data = newSeqOfCap[typeof stripGeneric(input)](shape.foldl(a * b))
assert len(input) == shape[0], &"invalid input length ({input.len()}) for tensor of shape {shape}"
for sub in input:
# FIXME inefficient as it involves copying for all subtensors. More
# optimized would probably involve passing `var container: seq[T]`
# buffer (from toplevel tensor) and appending elements to it (using
# additional heuristics to guess optimal initial size would also speed
# things up as you won't have to reallocate)
res.container.data &= newTensor(shape[1..^1], sub).container.data
return res
As the FIXME comment states, this is tremendously inefficient as it copies each and every time a subtensor is unpacked, which is not exactly CPU cache-friendly
Well, if I reduce your example to:
iterator flatten[T](source: seq[T]): auto =
## Flattens an arbitrarily nested
## sequence
when not (T is seq):
for element in source:
yield element
else:
for e in flatten(source):
yield e
for i in flatten(@[@[1,2,3],@[4,5],@[6]]):
echo i
We get an error message that might answer your question:
/usercode/in.nim(33, 17) template/generic instantiation of `flatten` from here
/usercode/in.nim(26, 25) Error: recursion is not supported in iterators: 'flatten'
I'm not sure if this is what you're looking for, but I came up with this:
import macros
macro genIteratorBody(typ: typedesc, source: typed): untyped =
var
seqType = getType(typ)[1]
curIterItem = source
curIterBody = newStmtList()
result = curIterBody
while seqType.kind == nnkBracketExpr and $seqType[0] == "seq":
let newLoop = nnkForStmt.newTree(nskForVar.genSym, nnkCall.newTree(bindSym"items", curIterItem), newStmtList())
curIterBody.add(newLoop)
curIterItem = newLoop[0]
curIterBody = newLoop[2]
seqType = seqType[1]
curIterBody.add nnkYieldStmt.newTree(curIterItem)
iterator flatten[T](source: T): auto =
## Flattens an arbitrarily nested
## sequence
genIteratorBody(T, source)
echo "first list:"
for item in flatten(@[@[0, 0, 0], @[1, 2, 3], @[2]]):
echo item
echo "second list:"
for item in flatten(@[@[@[1, 2, 4], @[0], @[3, 5, 6]], @[@[1, 2, 5], @[12], @[3]], @[@[0]]]):
echo item
template flattenTmpl(sq: seq or array, body: untyped): untyped =
when sq[0] is (seq or array):
for i in sq:
flattenTmpl(i, body)
else:
for i {.inject.} in sq:
body
iterator flatten(sq: seq or array): auto =
flattenTmpl(sq):
yield i
echo "first"
var a: seq[int]
for i in flatten(a):
echo i
echo "second"
for i in flatten(@[@[0, 1, 2], @[3, 4], @[5], @[], @[6, 7]]):
echo i
echo "third"
for i in flatten(@[@[@[1, 2, 3], @[4], @[5, 6, 7, 8]], @[], @[@[9, 10], @[]], @[@[11, 12], @[11], @[12]]]):
echo i
echo "array"
for i in flatten([1, 2, 3]):
echo i
echo "array and seq"
for i in flatten([@[1], @[2], @[3, 4]]):
echo i
Sooo, modifying @shirleyquirk's solution, this seems to work:
iterator flatten[T](source: openArray[T]): auto =
## Flattens an arbitrarily nested
## sequence
when T isnot seq:
for element in source:
yield element
else:
for each in source:
for e in flatten(each):
yield e
echo "first"
var a: seq[int]
for i in flatten(a):
echo i
echo "second"
for i in flatten(@[@[0, 1, 2], @[3, 4], @[5], @[], @[6, 7]]):
echo i
echo "third"
for i in flatten(@[@[@[1, 2, 3], @[4], @[5, 6, 7, 8]], @[], @[@[9, 10], @[]], @[@[11, 12], @[11], @[12]]]):
echo i
echo "array"
for i in flatten([1, 2, 3]):
echo i
echo "array and seq"
for i in flatten([@[1], @[2], @[3, 4]]):
echo i
Nice! I didn't know overloaded iterator can be used recursively.
When a depth of loop depends on runtime value, compiler cannot know how many loops need to be nested and cannot generate loops. But Nim can expand overloaded recursive inline iterator because a depth of loop can be determined at compile time.
This code works:
iterator test(a: int or string): int =
when a is int:
for i in test($a):
yield i
else:
for i in a:
#for i in test(a.len):
yield i.int
echo "begin iteration"
for i in test(123):
echo i
But following infinite overloaded recursive inline interator compiles without compile error or freezing compiler:
iterator test(a: int or string): int =
when a is int:
for i in test($a):
yield i
else:
#for i in a:
for i in test(a.len):
yield i.int
echo "begin iteration"
for i in test(123):
echo i
Ok, my turn.
It's not possible to create an arbitrarily-nested seq[seq... at runtime, so first here's a thrown-together nested type
type
Nested[T] = object
case isBase:bool
of true:
val: T
of false:
vals: seq[Nested[T]]
proc `$`[T](n: Nested[T]):string = (if n.isBase: $n.val else: $n.vals)
proc initNest[T](x: Nested[T]):Nested[T] = x
proc initNest[T:not openArray](x: T):Nested[T] = Nested[T](isBase:true, val: x)
proc N[T](args: varargs[Nested[T],initNest]):Nested[T] =
if args.len == 1: args[0]
else:
Nested[T](isBase:false, vals: @args)
and the creation of a nest of run-time-specified depth
import math
proc makeNest(depth: int,counter:int):Nested[int] =
if depth == 0:
result = N(counter,counter+1,counter+2)
else:
result = N(makeNest(depth-1,counter),
makeNest(depth-1,counter + pow(3.0,depth.float).int),
makeNest(depth-1,counter + 2 * pow(3.0,depth.float).int))
now the toIter stuff copy-pasted from the manual:
import macros
macro toItr(x: ForLoopStmt): untyped =
let expr = x[0]
let call = x[1][1] # Get foo out of toItr(foo)
let body = x[2]
result = quote do:
block:
let itr = `call`
for `expr` in itr():
`body`
and our flatten
proc flatten[T](n:Nested[T]):iterator():T =
result = iterator():T =
if n.isBase: yield n.val
else:
for i in n.vals:
for n in toItr(nitems(i)):
yield n
import os,strutils
proc main() =
let depth = commandLineParams()[0].parseInt()
let x = makeNest(depth,0)
for i in toItr(flatten(x)):
echo i
main()
Good solution, @shirleyquirk.
To others/more generally, all this has been discussed before several times and shows up on Forum searches. Also, this is a more general toItr. The manual is understandably tilted toward pedagogy not generality.
While I do provide one in cligen/sysUt, I do think something like this more general toItr may be a good addition to maybe sugar with keywords in proc docs like recursive iterator factory for discoverability.
The zeitgeist does seem to have an ineliminable component of "ask questions before searching", but many still search, and we could always be a bit better. It might be helpful for @nocturn9x to explain any process he went through which failed to turn up help he wanted.