import re;
var
tokens: seq[string]
timeLine = re"(..):(.*)"
if "00:00:36,443".match(timeLine, tokens):
echo ">>> ", tokens[0]
which results in:
Traceback (most recent call last)
sample.nim(8) sample
SIGSEGV: Illegal storage access. (Try to compile with -d:useSysAssert -d:useGcAssert for details.)
but IMHO it shouldn't. If match() returns true, then tokens shouldn't be empty.
@Araq
thanks you for the info, this works.
However, I wonder why - as match() accepts sequences - it wouldn't make more sense to have match() (and the other alike functions) take care of the sequence's expansion? At a minimum I would expect the compiler to print some warning message.
Pattern matching seems to be buggy:
1) Ignoring characters
import re;
var tokens: array[8, string]
let timeLine = re"(\d\d):(\d\d):(\d\d),(\d\d+) (.*)"
if "00:00:03,009 --> 00:00:08,009".match(timeLine, tokens):
echo "[", tokens[0], "] [", tokens[1], "] [", tokens[2], "] [", tokens[3], "] [", tokens[4], "]"
prints "[00] [00] [03] [009] [ --> 00:00:08,009]"; note the space before the arrow, which should not be matched.
2) Not matching at all
import re;
var tokens: array[8, string]
let timeLine = re"(\d\d):(\d\d):(\d\d),(\d\d+) --> (\d\d):(\d\d):(\d\d),(\d\d+)"
if "00:00:03,009 --> 00:00:08,009".match(timeLine, tokens):
echo "[", tokens[0], "] [", tokens[1], "] [", tokens[2], "] [", tokens[3], "] [", tokens[4], "] [", tokens[5], "] [", tokens[6], "] [", tokens[7], "]"
prints nothing
where the corresponding Perl script works as intended:
@tokens = "00:00:03,009 --> 00:00:08,009" =~ /(\d\d):(\d\d):(\d\d),(\d\d+) --> (\d\d):(\d\d):(\d\d),(\d\d+)/;
print "[", $tokens[0], "] [", $tokens[1], "] [", $tokens[2], "] [", $tokens[3], "] [", $tokens[4], "] [", $tokens[5], "] [", $tokens[6], "] [", $tokens[7], "]";
which prints: "[00] [00] [03] [009] [00] [00] [08] [009]"
proc re*(s: string, flags = {reExtended, reStudy}): Regex
The default is extended re syntax so whitespace is available to make the regexes more readable. But yes, this should be in big FAT letters in docs. PRs are welcome, as usual.
@Araq
Again, thanks for the info.
Where would I file a PR?
@BlaXpirit
I wouldn't mind using NRE, but for now I'd like to stick with "The Standard". Maybe RE will be NRE in the future?
var
tokens: array[2, string] # seq[string] would be nicer...
tests = ["-n345", "-n", "--test345"]
for test in tests:
tokens = ["", ""] # OMITTING THIS GIVES US "WRONG" RESULTS
if test.match(re"-([[:alpha:]])\s?(.+)?", tokens):
echo tokens[0], ", ", tokens[1]
var matchesidx = newseq[tuple[first: int, last: int]](5)
var spos = findbounds(fstr,regpat,matches=matchesidx,0)