Support PEG from Guile.

* module/mes/peg.mes: New file.
* module/mes/peg/cache.scm: New file.
* module/mes/peg/codegen.scm: New file.
* module/mes/peg/simplify-tree.scm: New file.
* module/mes/peg/string-peg.scm: New file.
* module/mes/peg/using-parsers.scm: New file.
* tests/peg.test: New file.
* GNUmakefile (TESTS): Add it.
This commit is contained in:
Jan Nieuwenhuizen 2016-12-22 20:16:44 +01:00
parent a0b18a402b
commit 33ac19d7df
9 changed files with 1004 additions and 0 deletions

View file

@ -26,6 +26,9 @@ module/mes/psyntax.pp [generated]
Optargs from Guile Optargs from Guile
module/mes/optargs.scm module/mes/optargs.scm
PEG from Guile
module/mes/peg/
Pmatch from Guile Pmatch from Guile
module/mes/pmatch.scm module/mes/pmatch.scm

View file

@ -67,6 +67,7 @@ TESTS:=\
tests/let-syntax.test\ tests/let-syntax.test\
tests/record.test\ tests/record.test\
tests/match.test\ tests/match.test\
tests/peg.test\
# #
BASE-0:=module/mes/base-0.mes BASE-0:=module/mes/base-0.mes

38
module/mes/peg.mes Normal file
View file

@ -0,0 +1,38 @@
;;; -*-scheme-*-
;;; Mes --- Maxwell Equations of Software
;;; Copyright © 2016 Jan Nieuwenhuizen <janneke@gnu.org>
;;;
;;; This file is part of Mes.
;;;
;;; Mes is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; Mes is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Mes. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;; peg.mes is loaded after syntax-case: psyntax. It provides PEG
;;; from Guile-2.1.
;;; Code:
(mes-use-module (mes guile))
(mes-use-module (mes pretty-print))
(mes-use-module (mes psyntax))
(mes-use-module (srfi srfi-13))
(mes-use-module (srfi srfi-9-psyntax))
(mes-use-module (mes pmatch))
(include-from-path "mes/peg/cache.scm")
(include-from-path "mes/peg/codegen.scm")
(include-from-path "mes/peg/string-peg.scm")
(include-from-path "mes/peg/using-parsers.scm")
(include-from-path "mes/peg/simplify-tree.scm")

47
module/mes/peg/cache.scm Normal file
View file

@ -0,0 +1,47 @@
;;; -*-scheme-*-
;;;; cache.scm --- cache the results of parsing
;;;;
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;
(define-module (ice-9 peg cache)
#:export (cg-cached-parser))
;; The results of parsing using a nonterminal are cached. Think of it like a
;; hash with no conflict resolution. Process for deciding on the cache size
;; wasn't very scientific; just ran the benchmarks and stopped a little after
;; the point of diminishing returns on my box.
(define *cache-size* 512)
(define (make-cache)
(make-vector *cache-size* #f))
;; given a syntax object which is a parser function, returns syntax
;; which, if evaluated, will become a parser function that uses a cache.
(define (cg-cached-parser parser)
#`(let ((cache (make-cache)))
(lambda (str strlen at)
(let* ((vref (vector-ref cache (modulo at *cache-size*))))
;; Check to see whether the value is cached.
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
(caddr vref);; If it is return it.
(let ((fres ;; Else calculate it and cache it.
(#,parser str strlen at)))
(vector-set! cache (modulo at *cache-size*)
(list str at fres))
fres))))))

358
module/mes/peg/codegen.scm Normal file
View file

@ -0,0 +1,358 @@
;;;; codegen.scm --- code generation for composable parsers
;;;;
;;;; Copyright (C) 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;
(define-module (ice-9 peg codegen)
#:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!)
#:use-module (ice-9 pretty-print)
#:use-module (system base pmatch))
(define-syntax single?
(syntax-rules ()
;;"Return #t if X is a list of one element."
((_ x)
(pmatch x
((_) #t)
(else #f)))))
(define-syntax single-filter
(syntax-rules ()
;;"If EXP is a list of one element, return the element. Otherwise return EXP."
((_ exp)
(pmatch exp
((,elt) elt)
(,elts elts)))))
(define-syntax push-not-null!
(syntax-rules ()
;;"If OBJ is non-null, push it onto LST, otherwise do nothing."
((_ lst obj)
(if (not (null? obj))
(push! lst obj)))))
(define-syntax push!
(syntax-rules ()
;;"Push an object onto a list."
((_ lst obj)
(set! lst (cons obj lst)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; CODE GENERATORS
;; These functions generate scheme code for parsing PEGs.
;; Conventions:
;; accum: (all name body none)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Code we generate will have a certain return structure depending on how we're
;; accumulating (the ACCUM variable).
(define (cg-generic-ret accum name body-uneval at)
;; name, body-uneval and at are syntax
#`(let ((body #,body-uneval))
#,(cond
((and (eq? accum 'all) name)
#`(list #,at
(cond
((not (list? body)) (list '#,name body))
((null? body) '#,name)
((symbol? (car body)) (list '#,name body))
(else (cons '#,name body)))))
((eq? accum 'name)
#`(list #,at '#,name))
((eq? accum 'body)
#`(list #,at
(cond
((single? body) (car body))
(else body))))
((eq? accum 'none)
#`(list #,at '()))
(else
(begin
(pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
(pretty-print "Defaulting to accum of none.\n")
#`(list #,at '()))))))
;; The short name makes the formatting below much easier to read.
(define cggr cg-generic-ret)
;; Generates code that matches a particular string.
;; E.g.: (cg-string syntax "abc" 'body)
(define (cg-string pat accum)
(let ((plen (string-length pat)))
#`(lambda (str len pos)
(let ((end (+ pos #,plen)))
(and (<= end len)
(string= str #,pat pos end)
#,(case accum
((all) #`(list end (list 'cg-string #,pat)))
((name) #`(list end 'cg-string))
((body) #`(list end #,pat))
((none) #`(list end '()))
(else (error "bad accum" accum))))))))
;; Generates code for matching any character.
;; E.g.: (cg-peg-any syntax 'body)
(define (cg-peg-any accum)
#`(lambda (str len pos)
(and (< pos len)
#,(case accum
((all) #`(list (1+ pos)
(list 'cg-peg-any (substring str pos (1+ pos)))))
((name) #`(list (1+ pos) 'cg-peg-any))
((body) #`(list (1+ pos) (substring str pos (1+ pos))))
((none) #`(list (1+ pos) '()))
(else (error "bad accum" accum))))))
;; Generates code for matching a range of characters between start and end.
;; E.g.: (cg-range syntax #\a #\z 'body)
(define (cg-range pat accum)
(syntax-case pat ()
((start end)
(if (not (and (char? (syntax->datum #'start))
(char? (syntax->datum #'end))))
(error "range PEG should have characters after it; instead got"
#'start #'end))
#`(lambda (str len pos)
(and (< pos len)
(let ((c (string-ref str pos)))
(and (char>=? c start)
(char<=? c end)
#,(case accum
((all) #`(list (1+ pos) (list 'cg-range (string c))))
((name) #`(list (1+ pos) 'cg-range))
((body) #`(list (1+ pos) (string c)))
((none) #`(list (1+ pos) '()))
(else (error "bad accum" accum))))))))))
;; Generate code to match a pattern and do nothing with the result
(define (cg-ignore pat accum)
(syntax-case pat ()
((inner)
(compile-peg-pattern #'inner 'none))))
(define (cg-capture pat accum)
(syntax-case pat ()
((inner)
(compile-peg-pattern #'inner 'body))))
;; Filters the accum argument to compile-peg-pattern for buildings like string
;; literals (since we don't want to tag them with their name if we're doing an
;; "all" accum).
(define (builtin-accum-filter accum)
(cond
((eq? accum 'all) 'body)
((eq? accum 'name) 'name)
((eq? accum 'body) 'body)
((eq? accum 'none) 'none)))
(define baf builtin-accum-filter)
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
(define (cg-and clauses accum)
#`(lambda (str len pos)
(let ((body '()))
#,(cg-and-int clauses (baf accum) #'str #'len #'pos #'body))))
;; Internal function builder for AND (calls itself).
(define (cg-and-int clauses accum str strlen at body)
(syntax-case clauses ()
(()
(cggr accum 'cg-and #`(reverse #,body) at))
((first rest ...)
#`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)))
(and res
;; update AT and BODY then recurse
(let ((newat (car res))
(newbody (cadr res)))
(set! #,at newat)
(push-not-null! #,body (single-filter newbody))
#,(cg-and-int #'(rest ...) accum str strlen at body)))))))
;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
(define (cg-or clauses accum)
#`(lambda (str len pos)
#,(cg-or-int clauses (baf accum) #'str #'len #'pos)))
;; Internal function builder for OR (calls itself).
(define (cg-or-int clauses accum str strlen at)
(syntax-case clauses ()
(()
#f)
((first rest ...)
#`(or (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)
#,(cg-or-int #'(rest ...) accum str strlen at)))))
(define (cg-* args accum)
(syntax-case args ()
((pat)
#`(lambda (str strlen at)
(let ((body '()))
(let lp ((end at) (count 0))
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
str strlen end))
(new-end (if match (car match) end))
(count (if (> new-end end) (1+ count) count)))
(if (> new-end end)
(push-not-null! body (single-filter (cadr match))))
(if (and (> new-end end)
#,#t)
(lp new-end count)
(let ((success #,#t))
#,#`(and success
#,(cggr (baf accum) 'cg-body
#'(reverse body) #'new-end)))))))))))
(define (cg-+ args accum)
(syntax-case args ()
((pat)
#`(lambda (str strlen at)
(let ((body '()))
(let lp ((end at) (count 0))
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
str strlen end))
(new-end (if match (car match) end))
(count (if (> new-end end) (1+ count) count)))
(if (> new-end end)
(push-not-null! body (single-filter (cadr match))))
(if (and (> new-end end)
#,#t)
(lp new-end count)
(let ((success #,#'(>= count 1)))
#,#`(and success
#,(cggr (baf accum) 'cg-body
#'(reverse body) #'new-end)))))))))))
(define (cg-? args accum)
(syntax-case args ()
((pat)
#`(lambda (str strlen at)
(let ((body '()))
(let lp ((end at) (count 0))
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
str strlen end))
(new-end (if match (car match) end))
(count (if (> new-end end) (1+ count) count)))
(if (> new-end end)
(push-not-null! body (single-filter (cadr match))))
(if (and (> new-end end)
#,#'(< count 1))
(lp new-end count)
(let ((success #,#t))
#,#`(and success
#,(cggr (baf accum) 'cg-body
#'(reverse body) #'new-end)))))))))))
(define (cg-followed-by args accum)
(syntax-case args ()
((pat)
#`(lambda (str strlen at)
(let ((body '()))
(let lp ((end at) (count 0))
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
str strlen end))
(new-end (if match (car match) end))
(count (if (> new-end end) (1+ count) count)))
(if (> new-end end)
(push-not-null! body (single-filter (cadr match))))
(if (and (> new-end end)
#,#'(< count 1))
(lp new-end count)
(let ((success #,#'(= count 1)))
#,#`(and success
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
(define (cg-not-followed-by args accum)
(syntax-case args ()
((pat)
#`(lambda (str strlen at)
(let ((body '()))
(let lp ((end at) (count 0))
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
str strlen end))
(new-end (if match (car match) end))
(count (if (> new-end end) (1+ count) count)))
(if (> new-end end)
(push-not-null! body (single-filter (cadr match))))
(if (and (> new-end end)
#,#'(< count 1))
(lp new-end count)
(let ((success #,#'(= count 1)))
#,#`(if success
#f
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
;; Association list of functions to handle different expressions as PEGs
(define peg-compiler-alist '())
(define (add-peg-compiler! symbol function)
(set! peg-compiler-alist
(assq-set! peg-compiler-alist symbol function)))
(add-peg-compiler! 'range cg-range)
(add-peg-compiler! 'ignore cg-ignore)
(add-peg-compiler! 'capture cg-capture)
(add-peg-compiler! 'and cg-and)
(add-peg-compiler! 'or cg-or)
(add-peg-compiler! '* cg-*)
(add-peg-compiler! '+ cg-+)
(add-peg-compiler! '? cg-?)
(add-peg-compiler! 'followed-by cg-followed-by)
(add-peg-compiler! 'not-followed-by cg-not-followed-by)
;; Takes an arbitrary expressions and accumulation variable, then parses it.
;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
(define (compile-peg-pattern pat accum)
(syntax-case pat (peg-any)
(peg-any
(cg-peg-any (baf accum)))
(sym (identifier? #'sym) ;; nonterminal
#'sym)
(str (string? (syntax->datum #'str)) ;; literal string
(cg-string (syntax->datum #'str) (baf accum)))
((name . args) (let* ((nm (syntax->datum #'name))
(entry (assq-ref peg-compiler-alist nm)))
(if entry
(entry #'args accum)
(error "Bad peg form" nm #'args
"Not one of" (map car peg-compiler-alist)))))))
;; Packages the results of a parser
(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
#`(lambda (str strlen at)
(let ((res (#,parser str strlen at)))
;; Try to match the nonterminal.
(if res
;; If we matched, do some post-processing to figure out
;; what data to propagate upward.
(let ((at (car res))
(body (cadr res)))
#,(cond
((eq? accumsym 'name)
#`(list at '#,s-syn))
((eq? accumsym 'all)
#`(list (car res)
(cond
((not (list? body))
(list '#,s-syn body))
((null? body) '#,s-syn)
((symbol? (car body))
(list '#,s-syn body))
(else (cons '#,s-syn body)))))
((eq? accumsym 'none) #`(list (car res) '()))
(else #`(begin res))))
;; If we didn't match, just return false.
#f))))

View file

@ -0,0 +1,97 @@
;;;; simplify-tree.scm --- utility functions for the PEG parser
;;;;
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;
(define-module (ice-9 peg simplify-tree)
#:export (keyword-flatten context-flatten string-collapse)
#:use-module (system base pmatch))
(define-syntax single?
(syntax-rules ()
;;"Return #t if X is a list of one element."
((_ x)
(pmatch x
((_) #t)
(else #f)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Is everything in LST true?
(define (andlst lst)
(or (null? lst)
(and (car lst) (andlst (cdr lst)))))
;; Is LST a list of strings?
(define (string-list? lst)
(and (list? lst) (not (null? lst))
(andlst (map string? lst))))
;; Groups all strings that are next to each other in LST. Used in
;; STRING-COLLAPSE.
(define (string-group lst)
(if (not (list? lst))
lst
(if (null? lst)
'()
(let ((next (string-group (cdr lst))))
(if (not (string? (car lst)))
(cons (car lst) next)
(if (and (not (null? next))
(list? (car next))
(string? (caar next)))
(cons (cons (car lst) (car next)) (cdr next))
(cons (list (car lst)) next)))))))
;; Collapses all the string in LST.
;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef")
(define (string-collapse lst)
(if (list? lst)
(let ((res (map (lambda (x) (if (string-list? x)
(apply string-append x)
x))
(string-group (map string-collapse lst)))))
(if (single? res) (car res) res))
lst))
;; If LST is an atom, return (list LST), else return LST.
(define (mklst lst)
(if (not (list? lst)) (list lst) lst))
;; Takes a list and "flattens" it, using the predicate TST to know when to stop
;; instead of terminating on atoms (see tutorial).
(define (context-flatten tst lst)
(if (or (not (list? lst)) (null? lst))
lst
(if (tst lst)
(list lst)
(apply append
(map (lambda (x) (mklst (context-flatten tst x)))
lst)))))
;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to
;; know when to stop at (see tutorial).
(define (keyword-flatten keyword-lst lst)
(context-flatten
(lambda (x)
(if (or (not (list? x)) (null? x))
#t
(member (car x) keyword-lst)))
lst))

View file

@ -0,0 +1,273 @@
;;;; string-peg.scm --- representing PEG grammars as strings
;;;;
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;
(define-module (ice-9 peg string-peg)
#:export (peg-as-peg
define-peg-string-patterns
peg-grammar)
#:use-module (ice-9 peg using-parsers)
#:use-module (ice-9 peg codegen)
#:use-module (ice-9 peg simplify-tree))
;; Gets the left-hand depth of a list.
(define (depth lst)
(if (or (not (list? lst)) (null? lst))
0
(+ 1 (depth (car lst)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; Parse string PEGs using sexp PEGs.
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Grammar for PEGs in PEG grammar.
(define peg-as-peg
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
pattern <-- alternative (SLASH sp alternative)*
alternative <-- ([!&]? sp suffix)+
suffix <-- primary ([*+?] sp)*
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
literal <-- ['] (!['] .)* ['] sp
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
CCrange <-- . '-' .
CCsingle <-- .
nonterminal <-- [a-zA-Z0-9-]+ sp
sp < [ \t\n]*
SLASH < '/'
LB < '['
RB < ']'
")
(define-syntax define-sexp-parser
(lambda (x)
(syntax-case x ()
((_ sym accum pat)
(let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
(accumsym (syntax->datum #'accum))
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
#`(define sym #,syn))))))
(define-sexp-parser peg-grammar all
(+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
(define-sexp-parser peg-pattern all
(and peg-alternative
(* (and (ignore "/") peg-sp peg-alternative))))
(define-sexp-parser peg-alternative all
(+ (and (? (or "!" "&")) peg-sp peg-suffix)))
(define-sexp-parser peg-suffix all
(and peg-primary (* (and (or "*" "+" "?") peg-sp))))
(define-sexp-parser peg-primary all
(or (and "(" peg-sp peg-pattern ")" peg-sp)
(and "." peg-sp)
peg-literal
peg-charclass
(and peg-nonterminal (not-followed-by "<"))))
(define-sexp-parser peg-literal all
(and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
(define-sexp-parser peg-charclass all
(and (ignore "[")
(* (and (not-followed-by "]")
(or charclass-range charclass-single)))
(ignore "]")
peg-sp))
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
(define-sexp-parser charclass-single all peg-any)
(define-sexp-parser peg-nonterminal all
(and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
(define-sexp-parser peg-sp none
(* (or " " "\t" "\n")))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; PARSE STRING PEGS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Takes a string representing a PEG grammar and returns syntax that
;; will define all of the nonterminals in the grammar with equivalent
;; PEG s-expressions.
(define (peg-parser str for-syntax)
(let ((parsed (match-pattern peg-grammar str)))
(if (not parsed)
(begin
;; (display "Invalid PEG grammar!\n")
#f)
(let ((lst (peg:tree parsed)))
(cond
((or (not (list? lst)) (null? lst))
lst)
((eq? (car lst) 'peg-grammar)
#`(begin
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
(context-flatten (lambda (lst) (<= (depth lst) 2))
(cdr lst))))))))))
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
;; defines all the appropriate nonterminals.
(define-syntax define-peg-string-patterns
(lambda (x)
(syntax-case x ()
((_ str)
(peg-parser (syntax->datum #'str) x)))))
;; lst has format (nonterm grabber pattern), where
;; nonterm is a symbol (the name of the nonterminal),
;; grabber is a string (either "<", "<-" or "<--"), and
;; pattern is the parse of a PEG pattern expressed as as string.
(define (peg-nonterm->defn lst for-syntax)
(let* ((nonterm (car lst))
(grabber (cadr lst))
(pattern (caddr lst))
(nonterm-name (datum->syntax for-syntax
(string->symbol (cadr nonterm)))))
#`(define-peg-pattern #,nonterm-name
#,(cond
((string=? grabber "<--") (datum->syntax for-syntax 'all))
((string=? grabber "<-") (datum->syntax for-syntax 'body))
(else (datum->syntax for-syntax 'none)))
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
;; lst has format ('peg-pattern ...).
;; After the context-flatten, (cdr lst) has format
;; (('peg-alternative ...) ...), where the outer list is a collection
;; of elements from a '/' alternative.
(define (peg-pattern->defn lst for-syntax)
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
(cdr lst)))))
;; lst has format ('peg-alternative ...).
;; After the context-flatten, (cdr lst) has the format
;; (item ...), where each item has format either ("!" ...), ("&" ...),
;; or ('peg-suffix ...).
(define (peg-alternative->defn lst for-syntax)
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
(context-flatten (lambda (x) (or (string? (car x))
(eq? (car x) 'peg-suffix)))
(cdr lst)))))
;; lst has the format either
;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
;; ('peg-suffix ...).
(define (peg-body->defn lst for-syntax)
(cond
((equal? (car lst) "&")
#`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
((equal? (car lst) "!")
#`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
((eq? (car lst) 'peg-suffix)
(peg-suffix->defn lst for-syntax))
(else `(peg-parse-body-fail ,lst))))
;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
(define (peg-suffix->defn lst for-syntax)
(let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
(cond
((null? (cddr lst))
inner-defn)
((equal? (caddr lst) "*")
#`(* #,inner-defn))
((equal? (caddr lst) "?")
#`(? #,inner-defn))
((equal? (caddr lst) "+")
#`(+ #,inner-defn)))))
;; Parse a primary.
(define (peg-primary->defn lst for-syntax)
(let ((el (cadr lst)))
(cond
((list? el)
(cond
((eq? (car el) 'peg-literal)
(peg-literal->defn el for-syntax))
((eq? (car el) 'peg-charclass)
(peg-charclass->defn el for-syntax))
((eq? (car el) 'peg-nonterminal)
(datum->syntax for-syntax (string->symbol (cadr el))))))
((string? el)
(cond
((equal? el "(")
(peg-pattern->defn (caddr lst) for-syntax))
((equal? el ".")
(datum->syntax for-syntax 'peg-any))
(else (datum->syntax for-syntax
`(peg-parse-any unknown-string ,lst)))))
(else (datum->syntax for-syntax
`(peg-parse-any unknown-el ,lst))))))
;; Trims characters off the front and end of STR.
;; (trim-1chars "'ab'") -> "ab"
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
;; Parses a literal.
(define (peg-literal->defn lst for-syntax)
(datum->syntax for-syntax (trim-1chars (cadr lst))))
;; Parses a charclass.
(define (peg-charclass->defn lst for-syntax)
#`(or
#,@(map
(lambda (cc)
(cond
((eq? (car cc) 'charclass-range)
#`(range #,(datum->syntax
for-syntax
(string-ref (cadr cc) 0))
#,(datum->syntax
for-syntax
(string-ref (cadr cc) 2))))
((eq? (car cc) 'charclass-single)
(datum->syntax for-syntax (cadr cc)))))
(context-flatten
(lambda (x) (or (eq? (car x) 'charclass-range)
(eq? (car x) 'charclass-single)))
(cdr lst)))))
;; Compresses a list to save the optimizer work.
;; e.g. (or (and a)) -> a
(define (compressor-core lst)
(if (or (not (list? lst)) (null? lst))
lst
(cond
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
(null? (cddr lst)))
(compressor-core (cadr lst)))
((and (eq? (car lst) 'body)
(eq? (cadr lst) 'lit)
(eq? (cadddr lst) 1))
(compressor-core (caddr lst)))
(else (map compressor-core lst)))))
(define (compressor syn for-syntax)
(datum->syntax for-syntax
(compressor-core (syntax->datum syn))))
;; Builds a lambda-expressions for the pattern STR using accum.
(define (peg-string-compile args accum)
(syntax-case args ()
((str-stx) (string? (syntax->datum #'str-stx))
(let ((string (syntax->datum #'str-stx)))
(compile-peg-pattern
(compressor
(peg-pattern->defn
(peg:tree (match-pattern peg-pattern string)) #'str-stx)
#'str-stx)
(if (eq? accum 'all) 'body accum))))
(else (error "Bad embedded PEG string" args))))
(add-peg-compiler! 'peg peg-string-compile)

View file

@ -0,0 +1,115 @@
;;;; using-parsers.scm --- utilities to make using parsers easier
;;;;
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;;;
(define-module (ice-9 peg using-parsers)
#:use-module (ice-9 peg simplify-tree)
#:use-module (ice-9 peg codegen)
#:use-module (ice-9 peg cache)
#:export (match-pattern define-peg-pattern search-for-pattern
prec make-prec peg:start peg:end peg:string
peg:tree peg:substring peg-record?))
;;;
;;; Helper Macros
;;;
(define-syntax until
(syntax-rules ()
;;"Evaluate TEST. If it is true, return its value. Otherwise,execute the STMTs and try again."
((_ test stmt stmt* ...)
(let lp ()
(or test
(begin stmt stmt* ... (lp)))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; FOR DEFINING AND USING NONTERMINALS
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Parses STRING using NONTERM
(define (match-pattern nonterm string)
;; We copy the string before using it because it might have been modified
;; in-place since the last time it was parsed, which would invalidate the
;; cache. Guile uses copy-on-write for strings, so this is fast.
(let ((res (nonterm (string-copy string) (string-length string) 0)))
(if (not res)
#f
(make-prec 0 (car res) string (string-collapse (cadr res))))))
;; Defines a new nonterminal symbol accumulating with ACCUM.
(define-syntax define-peg-pattern
(lambda (x)
(syntax-case x ()
((_ sym accum pat)
(let ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
(accumsym (syntax->datum #'accum)))
;; CODE is the code to parse the string if the result isn't cached.
(let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
#`(define sym #,(cg-cached-parser syn))))))))
(define (peg-like->peg pat)
(syntax-case pat ()
(str (string? (syntax->datum #'str)) #'(peg str))
(else pat)))
;; Searches through STRING for something that parses to PEG-MATCHER. Think
;; regexp search.
(define-syntax search-for-pattern
(lambda (x)
(syntax-case x ()
((_ pattern string-uncopied)
(let ((pmsym (syntax->datum #'pattern)))
(let ((matcher (compile-peg-pattern (peg-like->peg #'pattern) 'body)))
;; We copy the string before using it because it might have been
;; modified in-place since the last time it was parsed, which would
;; invalidate the cache. Guile uses copy-on-write for strings, so
;; this is fast.
#`(let ((string (string-copy string-uncopied))
(strlen (string-length string-uncopied))
(at 0))
(let ((ret (until (or (>= at strlen)
(#,matcher string strlen at))
(set! at (+ at 1)))))
(if (eq? ret #t) ;; (>= at strlen) succeeded
#f
(let ((end (car ret))
(match (cadr ret)))
(make-prec
at end string
(string-collapse match))))))))))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;; PMATCH STRUCTURE MUNGING
;; Pretty self-explanatory.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define prec
(make-record-type "peg" '(start end string tree)))
(define make-prec
(record-constructor prec '(start end string tree)))
(define (peg:start pm)
(if pm ((record-accessor prec 'start) pm) #f))
(define (peg:end pm)
(if pm ((record-accessor prec 'end) pm) #f))
(define (peg:string pm)
(if pm ((record-accessor prec 'string) pm) #f))
(define (peg:tree pm)
(if pm ((record-accessor prec 'tree) pm) #f))
(define (peg:substring pm)
(if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f))
(define peg-record? (record-predicate prec))

72
tests/peg.test Executable file
View file

@ -0,0 +1,72 @@
#! /bin/sh
# -*-scheme-*-
MES_ARENA=${MES_ARENA-10000000}
export MES_ARENA
echo ' ()' | cat $(dirname $0)/../module/mes/base-0.mes $0 /dev/stdin | $(dirname $0)/../scripts/mes $MES_FLAGS $MES_FLAGS"$@"
#paredit:||
exit $?
!#
;;; -*-scheme-*-
;;; Mes --- Maxwell Equations of Software
;;; Copyright © 2016 Jan Nieuwenhuizen <janneke@gnu.org>
;;;
;;; This file is part of Mes.
;;;
;;; Mes is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; Mes is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with Mes. If not, see <http://www.gnu.org/licenses/>.
(cond-expand
(guile-2.2
(use-modules (ice-9 peg)))
(guile
(use-modules (ice-9 syncase))
(display "guile 2.0: no PEG\n" (current-error-port))
(exit 0))
(mes
(mes-use-module (mes peg))
(mes-use-module (mes test))))
(pass-if "first dummy" #t)
(pass-if-not "second dummy" #f)
(define *etc-passwd*
"root:x:0:0:root:/root:/bin/bash
daemon:x:1:1:daemon:/usr/sbin:/bin/sh
bin:x:2:2:bin:/bin:/bin/sh
sys:x:3:3:sys:/dev:/bin/sh
nobody:x:65534:65534:nobody:/nonexistent:/bin/sh
messagebus:x:103:107::/var/run/dbus:/bin/false")
(define-peg-string-patterns
"string-passwd <- entry* !.
entry <-- (! NL .)* NL*
NL < '\n'")
(pass-if-equal "peg-tree"
(map (lambda (x) (list 'entry x)) (string-split *etc-passwd* #\newline))
(peg:tree (match-pattern string-passwd *etc-passwd*)))
(define-peg-pattern passwd body (and (* entry) (not-followed-by peg-any)))
(define-peg-pattern entry all (and (* (and (not-followed-by NL) peg-any))
(* NL)))
(define-peg-pattern NL none "\n")
(define-peg-pattern passwd body (peg "entry* !."))
(pass-if-equal "peg-tree"
(map (lambda (x) (list 'entry x)) (string-split *etc-passwd* #\newline))
(peg:tree (match-pattern passwd *etc-passwd*)))
(result 'report)