The code
import htmlparser
import xmltree
import strtabs
import strutils
let allhtml:File = open("allhtml.txt")
var line: string
while allhtml.readline(line):
echo "HTML FILE: ", line
var html = loadHTML(line.strip())
for a in html.findall("a"):
var url = a.attrs.getOrDefault("href", nil)
if not url.isNil and url.startswith("/"):
echo "$1\t$2" % [line, url]
close(allhtml)
The error:
Traceback (most recent call last)
rewrite.nim(26) rewrite
strtabs.nim(133) getOrDefault
strtabs.nim(104) rawGet
strtabs.nim(85) myhash
SIGSEGV: Illegal storage access. (Attempt to read from nil?)
nope. it did not make any difference.
import htmlparser
import xmltree
import strtabs
import strutils
let allhtml:File = open("allhtml.txt")
var line: string
var count = 0
while allhtml.readline(line):
echo "HTML file: ", count, line
var html = loadHTML(line.strip())
for a in html.findall("a"):
var url = a.attrs.getOrDefault("href", "/")
if not url.isNil and url.startswith("/"):
echo "$1\t$2" % [line, url]
count += 1
close(allhtml)
ISSUE RESOLVED.
I fixed the error by checking for attrsLen property like this:
import htmlparser
import xmltree
import strtabs
import strutils
let allhtml:File = open("allhtml.txt")
var line: string
var count = 0
while allhtml.readline(line):
echo "HTML file: ", count, line
let html = loadHTML(line.strip())
for a in html.findall("a"):
if a.attrsLen > 0:
let url = a.attrs.getOrDefault("href")
if not url.isNil:
if url.startswith("/"):
echo "$1\t$2" % [line, url]
count += 1
close(allhtml)