I have a small repo here, https://github.com/ThomasTJdev/nim_nordnet_api, where I scrape stock information, convert it to JSON and send it with MQTT. It uses the nimble package q for HTML querying. With q you can:
var doc = q(html)
# Search for nodes by css selector
echo doc.select("nav ul.menu li a")
If you need to login to private pages, you can find some inspiration for handling the cookies or login sessions here https://forum.nim-lang.org/t/6063 and here https://nim-lang.org/docs/httpcore.html#newHttpHeaders:
client.headers = newHttpHeaders({ "Authorization": token, "Content-Type": "application/json" })
Some days ago i tried to scrape data from a game trading site and build a small cookie handling "nim browser" (unfortunately the site was behind cloudflares anti crawling engine und behaved funny).
import httpClient, asyncdispatch, httpclient, strtabs, uri, strformat, strutils, os
type NimiBrowser = ref object
currentUri: string
cookies: StringTableRef
defaultHeaders: HttpHeaders
proxyUrl: string
proc setCookies(br: NimiBrowser, resp: AsyncResponse) =
if not resp.headers.hasKey("set-cookie"): return
for key, val in resp.headers.pairs:
if key.toLowerAscii == "set-cookie":
var cookies = val.split(";") # we dont need the rest
var cookie = cookies[0]
if cookie.contains("="):
let parts = cookie.split("=")
br.cookies[parts[0]] = parts[1].strip()
else:
br.cookies[cookie] = ""
proc setCrsfTokens(br: NimiBrowser, headers: HttpHeaders): HttpHeaders =
result = headers
if br.cookies.contains("XSRF-TOKEN"):
result["X-XSRF-TOKEN"] = br.cookies["XSRF-TOKEN"]
proc makeCookies(br: NimiBrowser): string =
for key, val in br.cookies.pairs:
if val != "": result.add fmt"{key}={val}; "
else: result.add fmt" {key};"
proc request(br: NimiBrowser, url: string, httpMethod: HttpMethod, body = "", headers = newHttpHeaders()): Future[AsyncResponse] {.async.} =
var vheaders = headers
vheaders["cookie"] = br.makeCookies()
vheaders["User-Agent"] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0"
vheaders["Accept-Language"] = "de,en-US;q=0.7,en;q=0.3"
vheaders["Referer"] = br.currentUri
vheaders = br.setCrsfTokens(vheaders)
var client: AsyncHttpClient
if br.proxyUrl != "":
client = newAsyncHttpClient(headers = headers, proxy = newProxy(br.proxyUrl))
else:
client = newAsyncHttpClient(headers = headers)
result = await client.request(url, httpMethod = httpMethod, body = body)
br.currentUri = url
br.setCookies(result)
for key, val in result.headers.pairs:
echo key, " ", val
proc get(br: NimiBrowser, url: string, body = "", headers = newHttpHeaders()): Future[AsyncResponse] {.async.} =
return await br.request(url, HttpGet, body, headers)
proc post(br: NimiBrowser, url: string, body = "", headers = newHttpHeaders()): Future[AsyncResponse] {.async.} =
return await br.request(url, HttpPost, body, headers)
proc newNimiBrowser(): NimiBrowser =
result = NimiBrowser(
cookies: newStringTable()
# defaultHeaders: defaultHeaders
)
var br = newNimiBrowser(
# defaultHeaders: newHttpHeaders
)
br.proxyUrl = "http://127.0.0.1:8080"
var resp = waitFor br.get("https://beta.pathofdiablo.com")
sleep(1000)
resp = waitFor br.get("https://beta.pathofdiablo.com/trade-search")
echo br.cookies
sleep(1000)
let body = """{"searchFilter":{"item":["Jah rune"],"need":"","quality":["All"],"gameMode":"softcore","poster":"","onlineOnly":false,"properties":[{"comparitor":"*"}]}}"""
var headers = newHttpHeaders({
"Accept": "application/json, text/plain, */*",
"Content-Type": "application/json;charset=utf-8"
})
resp = waitFor br.post("https://beta.pathofdiablo.com/api/v2/trade/search", body = body, headers = headers)
echo waitFor resp.body
echo resp.status