Taking a break from working on nim-cocoa, I wanted to see how difficult it would be to pull down all of the free books from goalkicker.com
Got it working, just wondering if there was anything I could have done differently?
import httpClient, htmlParser, xmltree, strutils, os
let site = "https://books.goalkicker.com/"
var client = newHttpClient()
let src = client.getContent(site)
var fileName: string
createDir("books")
let html = src.parseHtml
for a in html.findAll("div"):
if a.attr("class").startsWith("bookContainer"):
for b in a.findAll("a"):
let bookPage = client.getContent(site & b.attr("href"))
let book = bookPage.parseHtml
for c in book.findAll("div"):
if c.attr("id") == "header":
fileName = c.innerText.replace(" book",".pdf")
if fileName.startsWith('.'): fileName = "DOT" & fileName
if c.attr("id") == "footer":
var dlFile = c.innerText.split()[0]
var pdfFile = site & b.attr("href") & dlFile
echo "Downloading $#..." % [fileName]
client.downloadFile(pdfFile, "books/" & fileName)
AIR.
Something like
for b in html.querySelectorAll("div[class^='bookContainer'] > a"):
let bookPage = client.getContent(site & b.attr("href"))
let book = bookPage.parseHtml
let header = book.querySelector("div#header"):
fileName = header.innerText.replace(" book",".pdf")
if fileName.startsWith('.'): fileName = "DOT" & fileName
let footer = book.querySelector("div#footer")
var dlFile = footer.innerText.split()[0]
var pdfFile = site & b.attr("href") & dlFile
echo "Downloading $#..." % [fileName]
client.downloadFile(pdfFile, "books/" & fileName)
That's very cool, @xigoi!
Thanks for sharing, I didn't know about nimquery...