(import scheme) (import (chicken base)) (import (chicken format)) (import (chicken io)) (import (chicken port)) (import (chicken pretty-print)) (import (chicken process-context)) (import (chicken string)) (import (srfi 1)) (import getopt-long) (import html-parser) (import ssax) (import sxml-transforms) (import sxpath) (define (die message . args) (with-output-to-port (current-error-port) (lambda () (apply print message args))) (exit 1)) (define options '((html "Interpret input as HTML instead of XML") (raw "Don't pretty-print results") (strip-namespaces "Strip XML namespaces") (help "Prints this help" (single-char #\h)))) (define usage-hint (format "Usage: ~a [options]... [file]\n" (program-name))) (define (strip-namespaces sxml) (pre-post-order* sxml `((*text* . ,(lambda (_ str) str)) (*default* . ,(lambda (tag elements) (let* ((name (symbol->string tag)) (tag (if (substring-index ":" name) (string->symbol (last (string-split name ":"))) tag))) (cons tag elements))))))) (define (process query file opts) (let* ((html? (alist-ref 'html opts)) (strip-namespaces? (alist-ref 'strip-namespaces opts)) (reader (if html? html->sxml (lambda (port) (ssax:xml->sxml port '())))) (sxml (if file (call-with-input-file file reader) (reader (current-input-port)))) (sxml (if strip-namespaces? (strip-namespaces sxml) sxml))) (if (equal? query ".") sxml ((sxpath query) sxml)))) ;; TODO: support class matching ;; NOTE: //div[contains(concat(' ',normalize-space(@class),' '),' foobar ')] (define (main) (let* ((opts (getopt-long (command-line-arguments) options)) (help? (alist-ref 'help opts)) (raw? (alist-ref 'raw opts)) (args (alist-ref '@ opts))) (when help? (display usage-hint) (print (usage options)) (exit 0)) (when (or (null? args) (> (length args) 2)) (die usage-hint (usage options))) (let* ((query (car args)) (file (if (null? (cdr args)) #f (cadr args))) (results (process query file opts))) (if raw? (if (pair? results) (for-each print results) (print results)) (pp results))))) (main)