I'm trying to make a simple banner grabber in order to learn nim and make a comparison to other solutions. The main problem here is the timeout on httpclient.get() which would really timeout after x milliseconds.
The latest attempt was as follows:
import httpclient, threadpool, os, asyncdispatch
type
LinkCheckResult = ref object
url: string
state: bool
proc getUrlWithTimeoutAsync(url: string, timeout: int): LinkCheckResult {.thread.} =
var client = newAsyncHttpClient()
var resp: Future[AsyncResponse]
try:
resp = client.get(url)
except:
return LinkCheckResult(url: url, state: false)
var count = 0
while count < timeout: # timeout implementation ???
sleep(100)
count += 100
if resp.finished():
return LinkCheckResult(url: url, state: true) # this never happens. why !?
return LinkCheckResult(url: url, state: false)
proc main() : void =
var linkCheckResult = newSeq[FlowVar[LinkCheckResult]]()
linkCheckResult.add(spawn getUrlWithTimeoutAsync("http://192.168.0.10", 2000))
sync()
for x in linkCheckResult:
let res = ^x
if res.state:
echo "[+] ", res.url
main()
Any help?
# this never happens. why !?
Because you never run the async event loop.
I would recommend writing this async-only at first, and if you really need to scale across all cores just run multiple processes.
The following code should achieve what you want. The challenge here is that the AsyncHttpClient in stdlib does not provide a timeout feature, also in case you have a long list of links or you want to reuse the TCP connection, you need to manually create a client pool.
I have encountered and deal with the same problem before. You can download http_client_pool here which wrap AsyncHttpClient for timeout and pooling.
The implementation probably does not squeeze out the maximum, but I have been using http_client_pool for stress testing a server at rate 400 req/s lasting for weeks without problems. The code should be stable enough, at least to me :-)
import asyncdispatch
import sequtils
import sugar
import http_client_pool
const urls = [
"https://google.com",
"https://yahoo.com",
"https://forum.nim-lang.org",
"https://somenotexisting.com", # Name or service not known
"https://127.0.0.1",
"https://192.168.0.10", # timeout
]
type
LinkCheckResult = object
url: string
state: bool
proc checkLink(pool: HttpClientPool, url: string): Future[LinkCheckResult] {.async.} =
try:
let res = await pool.request(url, timeout=2000)
let _ = await res.body()
result = LinkCheckResult(url: url, state: true)
except:
result = LinkCheckResult(url: url, state: false)
proc main() {.async.} =
# limit at most 6 concurrent connections
let pool = newHttpClientPool(6)
let linkCheckResults = await all urls.map(url => pool.checkLink(url))
echo linkCheckResults
when isMainModule:
waitFor main()
btw, here is another example with true multi-threaded application using threadproxy which is designed to simplify Nim inter-thread communication, but again it is not squeeze out the maximum performance type. Also, just like others have said async is more than enough in this scenario.
import deques
import threadproxy
import http_client_pool
let urls = [
"https://google.com",
"https://yahoo.com",
"https://forum.nim-lang.org",
"https://somenotexisting.com", # Name or service not known
"https://127.0.0.1",
"https://192.168.0.10", # timeout
]
type
LinkCheckResult = object
url: string
state: bool
proc workerMain(proxy: ThreadProxy) {.thread.} =
let pool = newHttpclientPool(1)
proc process() {.async.} =
let job = await proxy.ask("master", "job")
if job.kind == JNull:
# no more job
proxy.stop()
else:
# process job
let url = job.getStr()
var state = false
try:
let res = await pool.request(url, timeout=2000)
let _ = await res.body()
state = true
except:
discard
await proxy.send("master", "result", %*{
"url": url,
"state": state
})
# start processing channel
asyncCheck proxy.poll()
while proxy.isRunning:
waitFor process()
proc main() =
# prepare jobs
var jobs = initDeque[string]()
for url in urls: jobs.addLast url
# prepare results
var linkCheckResults: seq[LinkCheckResult]
# create and setup MainThreadProxy
let proxy = newMainThreadProxy("master")
# on worker sending back result
proxy.onData "result":
echo "result ", linkCheckResults.len, " ", data
linkCheckResults.add LinkCheckResult(
url: data["url"].getStr(),
state: data["state"].getBool()
)
if linkCheckResults.len == urls.len:
# all done
proxy.stop()
# on workers asking for job
proxy.onData "job":
# handle thread asking for job
if jobs.len > 0:
result = %jobs.popFirst
echo "distributing ", result
else:
# return null if no more job
result = newJNull()
# create 4 worker threads
for i in 1 .. 4:
proxy.createThread("worker_" & $i, workerMain)
# poll until proxy stop
waitFor proxy.poll()
# print results
for x in linkCheckResults:
if x.state:
echo "[+] ", x.url
else:
echo "[ ] ", x.url
when isMainModule:
main()
Thanks all and especially thanks to @jackhftang for the examples. I will do some more investigation. It seems that we're progressing here :).
Good job all, thanks one more time.