Hi,
I regularly have to unpack binaries. I'm wondering if there is a library with convenience functions or macros to help with it.
I often see the following pattern:
type
MyStruct = object
data1, data2, data3, numObjects1, data4, numObjects2: uint32
objects1: seq[Object1]
data5, data6: uint32
objects2: seq[Object2]
And my code to read it:
proc readMyStruct(s: Stream): MyStruct =
discard s.readData(result.addr, sizeof(data1)+sizeof(data2)+...+sizeof(numObjects2))
result.objects1.setLen(result.numObjects1)
discard s.readData(result.objects1[0].addr, sizeof(Object1) * result.numObjects1)
data5 = s.readuint32()
data6 = s.readuint32()
for i in 0..<result.numObjects2: # Object2 contains sequences so I can't copy it directly from the stream
result.objects2.add s.readObject2()
I wish there were a function or a macro that allows me to unpack MyStruct in one call, recursively, rather than having to write my own function that has to cut-up the reading code every time I encounter an array.
Has anyone coded something like that already?
I have also used https://github.com/jangko/msgpack4nim for this purpose.
For custom object that contains pointers or ref you may need to specify a marshalling / unmarshalling hook
Another one for the list, but targeted toward embedded: https://elcritch.github.io/cdecl/cdecl/bitfields.html
Also msgpsck4nim is amazingly fast. Part of the trick there is how it deserializes directly into a target object using ‘fieldPairs’ or ‘fields’ iterator. Highly recommend using magpack4nim if possible.
It’s also pretty straightforward to make you own generic serde proc using the ‘fields’ combined with some ‘with T is X:’ checks. Then recursive call itself.
Thank you guys, those are some great resources.
I've reviewed them all but ended up not going with any of them because:
If I had known about them from the start of the project I think I'd have gone with one of the DSL ones. Also I have a second project where I had been writing my own packer and I'm probably going to replace it with Message Pack, I had no idea such a thing existed.
I ended up writing the following macro:
proc `*`*(a, b: seq[string]): seq[string] =
for i in a:
for j in b:
result.add i & j
macro createReader*(t: typedesc, seqLengthIdents: varargs[untyped]): untyped =
let readerName = ident("read" & t.repr)
let stream = ident("s")
result = quote do:
proc `readerName`*(`stream`: Stream): `t.repr` =
discard
let fields = getType(t)[1].getTypeImpl()[2]
if fields.len == 0: # alias, e.g Vec3 = array[3, float32] . !!Making some heavy assumptions about how they're used!!
let readExpr = quote do: discard s.readData(result.addr, sizeof(result))
result[^1].add readExpr
var currentSeqIndex: int = 0
for field in fields:
let fieldName: NimNode = field[0]
let fieldType: NimNode = field[1]
var readExpr: NimNode
let isSequence: bool = fieldType.kind == nnkBracketExpr and fieldType[0].repr == "seq" # Corresponds to seq[T] but **not** array[N, T]
let isExpectedType: bool = fieldType.kind == nnkSym or (fieldType.kind == nnkBracketExpr and fieldType[0].repr == "array")
if isSequence:
let itemType = fieldType[1] # T of seq[T]
let procName = ident("read" & itemType.repr) # readT
let lengthName = if seqLengthIdents.len == 0: newDotExpr(ident("result"), ident("num" & fieldName.repr)) # automatic naming: objects[] will have a count of numObjects
else: seqLengthIdents[currentSeqIndex]
readExpr = quote do:
for i in 0..<(`lengthName`):
result.`fieldName`.add s.`procName`()
while s.getPosition() mod 4 != 0: discard s.readuint8() # discard padding
currentSeqIndex += 1
elif isExpectedType:
if fieldType.repr in @["byte", "int", "uint", "float"] * @["", "8", "16", "32", "64"] or fieldType.kind == nnkBracketExpr : # Standard types and arrays
readExpr = quote do: discard s.readData(result.`fieldName`.addr, sizeof(result.`fieldName`))
else: # Custom type T: we call s.readT()
let procName = ident("read" & fieldType.repr)
readExpr = quote do: result.`fieldName` = s.`procName`()
else:
raise newException(ValueError, "Unknown identifier " & $(fieldName.kind) & " in field " & $fieldName & " of type " & $t.type)
result[^1].add readExpr
echo result.repr
For this updated example:
type
Object1 = object
od1, od2: uint32
od3: array[4, uint8]
Object2 = object
numData: uint32
data: seq[uint32]
MyStruct {.packed.} = object
data1, data2, data3, numObjects1, data4: uint32
numObjects2: uint32 = 3# Kept separated from the other uint32s so I can see what it does to the AST
objects1: seq[Object1]
data5, data6: uint32
data7: Object2
objects2: seq[Object2]
objects3: seq[Object2]
data8: uint32
objects4: seq[Object2]
object5: Object1
createReader(Object1)
createReader(Object2)
createReader(MyStruct, result.numObjects1, result.numObjects2, result.numObjects2, result.numObjects2-1)
It will generate the following code:
proc readObject1*(s: Stream): Object1 =
discard
discard s.readData(result.od1.addr, sizeof(result.od1))
discard s.readData(result.od2.addr, sizeof(result.od2))
discard s.readData(result.od3.addr, sizeof(result.od3))
proc readObject2*(s: Stream): Object2 =
discard
discard s.readData(result.numData.addr, sizeof(result.numData))
for i`gensym12 in 0 ..< (result.numdata):
result.data.add s.readuint32()
while s.getPosition() mod 4 != 0:
discard s.readUint8()
proc readMyStruct*(s: Stream): MyStruct =
discard
discard s.readData(result.data1.addr, sizeof(result.data1))
discard s.readData(result.data2.addr, sizeof(result.data2))
discard s.readData(result.data3.addr, sizeof(result.data3))
discard s.readData(result.numObjects1.addr, sizeof(result.numObjects1))
discard s.readData(result.data4.addr, sizeof(result.data4))
discard s.readData(result.numObjects2.addr, sizeof(result.numObjects2))
for i`gensym21 in 0 ..< (result.numObjects1):
result.objects1.add s.readObject1()
while s.getPosition() mod 4 != 0:
discard s.readUint8()
discard s.readData(result.data5.addr, sizeof(result.data5))
discard s.readData(result.data6.addr, sizeof(result.data6))
result.data7 = s.readObject2()
for i`gensym25 in 0 ..< (result.numObjects2):
result.objects2.add s.readObject2()
while s.getPosition() mod 4 != 0:
discard s.readUint8()
for i`gensym26 in 0 ..< (result.numObjects2):
result.objects3.add s.readObject2()
while s.getPosition() mod 4 != 0:
discard s.readUint8()
discard s.readData(result.data8.addr, sizeof(result.data8))
for i`gensym28 in 0 ..< (result.numObjects2 - 1):
result.objects4.add s.readObject2()
while s.getPosition() mod 4 != 0:
discard s.readUint8()
result.object5 = s.readObject1()
But like most macros, having written it I come off feeling it wasn't really worth the added complexity.
Also, maybe you guys know how to do this, I wanted to write another macro to create multiple readers at once. Something like:
macro createReaders(types: varargs[typedesc]): untyped =
result = newStmtList()
for t in types:
result.add createReader(t)
But it doesn't work because the typedescs get transformed into NimNodes. How about:
macro createReaders(types: varargs[typed]) =
result = newStmtList()
for t in types:
result.add quote do:
createReader(`t`)
You can try to simplify your "dsl" by using a template / proc. Something like:
macro getNumFld(obj; fieldName: string) =
let id = ident("num"&fieldName)
quote do:
`obj`.`id`
proc readObject1*(s: Stream): Object2 =
for name, field in result.fieldPairs():
when typeof(field) is int32:
discard s.readData(field.addr, sizeof(result.field))
elif typeof(field) is seq[uint32]:
for i in 0 ..< (result.numdata):
result.data.add s.readuint32()
while s.getPosition() mod 4 != 0:
discard s.readUint8()
elif typeof(field) is seq: # see std/typetraits to get gen types
for i in 0 ..< getNumFld(result, name):
result.data.add s.readuint32()
while s.getPosition() mod 4 != 0:
discard s.readUint8()
...