(module extract () (import scheme) (import (chicken base)) (import (chicken file)) (import (chicken io)) (import (chicken pathname)) (import (chicken process-context)) (import (srfi 1)) (import (srfi 18)) (import http-client) (import medea) (import uri-common) (form-urlencoded-separator "&&") (client-software '(("Mozilla" "5.0" "compatible; +https://wald.brause.cc"))) (define api-url "https://en.wikipedia.org/w/api.php") (define initial-query '((action . "query") (titles . "Template:Mycomorphbox") (generator . "transcludedin") (format . "json") (prop . "revisions") (rvslots . "*") (rvprop . "content"))) (define (save-file! filename string) (call-with-output-file filename (lambda (out) (display string out)))) (define (update-query query continue) (fold (lambda (param query) (alist-update (car param) (cdr param) query)) query continue)) (define (fetch-json! path query) (let* ((uri (update-uri (uri-reference api-url) query: query)) (body (call-with-input-request uri #f (cut read-string #f <>))) (json (read-json body))) (save-file! path body) (let ((continue (alist-ref 'continue json))) (if continue (update-query query continue) #f)))) (define (main dir) (when (and (file-exists? dir) (not (null? (directory dir)))) (print "Non-empty target directory: " dir) (exit 1)) (create-directory dir 'parents) (let loop ((i 0) (query initial-query)) (let* ((path (make-pathname dir (number->string i) ".json")) (query (fetch-json! path query))) (print "Fetched " path) (when query (thread-sleep! 30) (loop (add1 i) query))))) (apply main (command-line-arguments)) )