;; hello java (define-record pushback-input-port port buf pos) (define (pb:wrap-input-port port #!optional (buffer-size 1)) (when (<= buffer-size 0) (error "Buffer size must be a positive number" buffer-size)) (make-pushback-input-port port (make-vector buffer-size #f) buffer-size)) (define (pb:peek-char port) (let ((buf (pushback-input-port-buf port)) (pos (pushback-input-port-pos port)) (%port (pushback-input-port-port port))) (if (< pos (vector-length buf)) (vector-ref buf pos) (peek-char %port)))) (define (pb:read-char port) (let ((buf (pushback-input-port-buf port)) (pos (pushback-input-port-pos port)) (%port (pushback-input-port-port port))) (if (< pos (vector-length buf)) (let ((char (vector-ref buf pos))) (pushback-input-port-pos-set! port (add1 pos)) char) (read-char %port)))) (define (pb:pushback-char char port) (let ((buf (pushback-input-port-buf port)) (pos (pushback-input-port-pos port))) (when (zero? pos) (error "Pushback buffer is full")) (pushback-input-port-pos-set! port (sub1 pos)) (vector-set! buf (sub1 pos) char))) ;; hello emacs (define (looking-at? str in) (define (unread-chars chars) (for-each (lambda (char) (pb:pushback-char char in)) chars)) (let loop ((chars (string->list str)) (read-chars '())) (if (null? chars) (begin (unread-chars read-chars) str) (let ((char (pb:read-char in))) (if (eqv? char (car chars)) (loop (cdr chars) (cons char read-chars)) (begin (unread-chars (cons char read-chars)) #f)))))) (define (consume! n in) (let loop ((i 0)) (when (< i n) (pb:read-char in) (loop (add1 i))))) ;; consume if we are looking at str (define (looking-at! str in) (let ((ret (looking-at? str in))) (when ret (consume! (string-length ret) in)) ret)) (define (looking-at-any? strs in) (let loop ((strs strs)) (if (pair? strs) (or (looking-at? (car strs) in) (loop (cdr strs))) #f))) (define (looking-at-any! strs in) (let ((ret (looking-at-any? strs in))) (when ret (consume! (string-length ret) in)) ret)) (define available-languages (make-parameter '())) (define loaded-grammars (make-parameter (make-hash-table))) (define (language->grammar language) (let ((name (car language)) (meta (cdr language)) (grammar '()) (spans '())) (and-let* ((keywords (alist-ref 'keywords meta))) (let ((delimiters (or (car (alist-ref 'delimiters meta)) " \t\r\n"))) (set! grammar (cons `(keywords (rx . ,(irregex `(or ,@keywords))) (delimiters . ,delimiters)) grammar)))) (let ((meta (remove (lambda (item) (member (car item) '(aliases delimiters keywords))) meta))) (for-each (lambda (directive) (let ((class (car directive)) (rules (cdr directive))) (case class ((raw-string-literals escapable-string-literals) (let ((delimiters (map ->string (or (alist-ref 'delimiters rules) '(#\")))) (escape (and (eqv? class 'escapable-string-literals) (car (map ->string (or (alist-ref 'escape-character rules) '(#\\))))))) (for-each (lambda (delimiter) (let ((starter (->string (if (pair? delimiter) (car delimiter) delimiter))) (ender (->string (if (pair? delimiter) (cdr delimiter) delimiter)))) (set! spans (cons `((starter . ,starter) (ender . ,ender) (escape . ,escape) (token . string)) spans)))) delimiters))) ((comment highlight highlight-removed highlight-added) (for-each (lambda (rule) (let* ((type (car rule)) (delimiters (cdr rule)) (starter (car delimiters)) (ender (if (eqv? type 'until-newline) "\n" (cadr delimiters)))) (set! spans (cons `((starter . ,starter) (ender . ,ender) (nested? . ,(eqv? type 'nested-multiline)) (token . ,class)) spans)))) rules)) (else (error "unsupported grammatical construct" class))))) meta)) ;; HACK: perform longest munch by placing the longest starters first (set! spans (sort spans (lambda (a b) (> (string-length (alist-ref 'starter a)) (string-length (alist-ref 'starter b)))))) (set! grammar (cons (cons 'spans spans) grammar)) grammar)) (define (load-languages languages) (available-languages (append languages (available-languages)))) (define (load-languages-from-path language-path) (load-languages (call-with-input-file language-path read-list))) (define (built-in-path name) (make-pathname (list (chicken-home) "minlight") name)) (define (load-built-in-languages) (load-languages-from-path (built-in-path "languages"))) (define (lookup-grammar name) (let ((grammar (hash-table-ref/default (loaded-grammars) name #f))) (or grammar (let loop ((languages (available-languages))) (if (null? languages) (error "no such language" name) (let* ((language (car languages)) (language-name (car language)) (aliases (or (alist-ref 'aliases (cdr language)) '()))) (if (or (equal? name language-name) (member name aliases)) (let ((grammar (language->grammar language))) (hash-table-set! (loaded-grammars) language-name grammar) (for-each (lambda (alias) (hash-table-set! (loaded-grammars) alias grammar)) aliases) grammar) (loop (cdr languages))))))))) (define (language-exists? name) (and (lookup-grammar name) #t)) (define (lex-chunk lexeme keyword-rx delimiters) (define (last-char str) (if (zero? (string-length str)) #f (string-ref str (sub1 (string-length str))))) (define (handle-match from-index match seed) (let* ((match-beg (irregex-match-start-index match)) (match-end (irregex-match-end-index match)) (match-string (irregex-match-substring match)) (preceding-string (if (= from-index match-beg) #f (substring lexeme from-index match-beg)))) (let ((seed (if preceding-string (cons `(chunk ,preceding-string) seed) seed))) (cond ;; non-delimiter char before keyword ((and preceding-string (not (substring-index (string (last-char preceding-string)) delimiters))) (cons `(chunk ,match-string) seed)) ;; non-delimiter char after keyword ((and (not (= match-end (string-length lexeme))) (not (substring-index (string (string-ref lexeme match-end)) delimiters))) (cons `(chunk ,match-string) seed)) (else (cons `(keyword ,match-string) seed)))))) (define (handle-finish from-index seed) (if (= from-index (string-length lexeme)) seed (cons `(chunk ,(substring lexeme from-index)) seed))) (define (flatten-tokens tokens) (foldr (lambda (item seed) (if (and (pair? seed) (eqv? (car item) 'chunk) (eqv? (car (car seed)) 'chunk)) (cons `(chunk ,(string-append (cadr (car seed)) (cadr item))) (cdr seed)) (cons item seed))) '() tokens)) (flatten-tokens (irregex-fold keyword-rx handle-match '() lexeme handle-finish))) (define (lex language in) (define max-keyword-size 64) (define grammar (lookup-grammar language)) (define keywords (alist-ref 'keywords grammar)) (define keyword-rx (and keywords (alist-ref 'rx keywords))) (define delimiters (and keywords (alist-ref 'delimiters keywords))) (define spans (alist-ref 'spans grammar)) (define span-dict (and spans (map (lambda (span) (cons (alist-ref 'starter span) span)) spans))) (define span-starters (and spans (map car span-dict))) (let ((in (pb:wrap-input-port in max-keyword-size))) (let loop ((tokens '())) (cond ((eof-object? (pb:peek-char in)) (reverse tokens)) ((and span-starters (looking-at-any! span-starters in)) => (lambda (starter) (let* ((span (alist-ref starter span-dict equal?)) (ender (alist-ref 'ender span)) (token-type (alist-ref 'token span)) (escape (alist-ref 'escape span)) (nested? (alist-ref 'nested? span))) (loop (cons (let loop ((chars (reverse (string->list starter))) (depth 1)) ; HACK (cond ((eof-object? (pb:peek-char in)) (list token-type (list->string (reverse chars)))) ((and escape (looking-at! escape in)) (let ((chars (append (reverse (string->list escape)) chars))) (if (eof-object? (pb:peek-char in)) (list token-type (list->string (reverse chars))) (loop (cons (pb:read-char in) chars) depth)))) ((and nested? (looking-at! starter in)) (let ((chars (append (reverse (string->list starter)) chars))) (loop chars (add1 depth)))) ((looking-at! ender in) (let ((chars (append (reverse (string->list ender)) chars))) (assert (>= depth 1) "invalid depth") (if (= depth 1) (list token-type (list->string (reverse chars))) (loop chars (sub1 depth))))) (else (loop (cons (pb:read-char in) chars) depth)))) tokens))))) (else (loop (append (let loop ((chars '())) (if (or (eof-object? (pb:peek-char in)) (and span-starters (looking-at-any? span-starters in))) (if (and keyword-rx delimiters) (lex-chunk (list->string (reverse chars)) keyword-rx delimiters) `((chunk ,(list->string (reverse chars))))) (loop (cons (pb:read-char in) chars)))) tokens))))))) (define (lex-string language str) (call-with-input-string str (lambda (in) (lex language in)))) (define (lex-file language path) (call-with-input-file path (lambda (in) (lex language in))))