Support PEG from Guile.
* module/mes/peg.mes: New file. * module/mes/peg/cache.scm: New file. * module/mes/peg/codegen.scm: New file. * module/mes/peg/simplify-tree.scm: New file. * module/mes/peg/string-peg.scm: New file. * module/mes/peg/using-parsers.scm: New file. * tests/peg.test: New file. * GNUmakefile (TESTS): Add it.
This commit is contained in:
parent
a0b18a402b
commit
33ac19d7df
3
AUTHORS
3
AUTHORS
|
@ -26,6 +26,9 @@ module/mes/psyntax.pp [generated]
|
||||||
Optargs from Guile
|
Optargs from Guile
|
||||||
module/mes/optargs.scm
|
module/mes/optargs.scm
|
||||||
|
|
||||||
|
PEG from Guile
|
||||||
|
module/mes/peg/
|
||||||
|
|
||||||
Pmatch from Guile
|
Pmatch from Guile
|
||||||
module/mes/pmatch.scm
|
module/mes/pmatch.scm
|
||||||
|
|
||||||
|
|
|
@ -67,6 +67,7 @@ TESTS:=\
|
||||||
tests/let-syntax.test\
|
tests/let-syntax.test\
|
||||||
tests/record.test\
|
tests/record.test\
|
||||||
tests/match.test\
|
tests/match.test\
|
||||||
|
tests/peg.test\
|
||||||
#
|
#
|
||||||
|
|
||||||
BASE-0:=module/mes/base-0.mes
|
BASE-0:=module/mes/base-0.mes
|
||||||
|
|
38
module/mes/peg.mes
Normal file
38
module/mes/peg.mes
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
;;; -*-scheme-*-
|
||||||
|
|
||||||
|
;;; Mes --- Maxwell Equations of Software
|
||||||
|
;;; Copyright © 2016 Jan Nieuwenhuizen <janneke@gnu.org>
|
||||||
|
;;;
|
||||||
|
;;; This file is part of Mes.
|
||||||
|
;;;
|
||||||
|
;;; Mes is free software; you can redistribute it and/or modify it
|
||||||
|
;;; under the terms of the GNU General Public License as published by
|
||||||
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||||
|
;;; your option) any later version.
|
||||||
|
;;;
|
||||||
|
;;; Mes is distributed in the hope that it will be useful, but
|
||||||
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
;;; GNU General Public License for more details.
|
||||||
|
;;;
|
||||||
|
;;; You should have received a copy of the GNU General Public License
|
||||||
|
;;; along with Mes. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
;;; Commentary:
|
||||||
|
|
||||||
|
;;; peg.mes is loaded after syntax-case: psyntax. It provides PEG
|
||||||
|
;;; from Guile-2.1.
|
||||||
|
|
||||||
|
;;; Code:
|
||||||
|
|
||||||
|
(mes-use-module (mes guile))
|
||||||
|
(mes-use-module (mes pretty-print))
|
||||||
|
(mes-use-module (mes psyntax))
|
||||||
|
(mes-use-module (srfi srfi-13))
|
||||||
|
(mes-use-module (srfi srfi-9-psyntax))
|
||||||
|
(mes-use-module (mes pmatch))
|
||||||
|
(include-from-path "mes/peg/cache.scm")
|
||||||
|
(include-from-path "mes/peg/codegen.scm")
|
||||||
|
(include-from-path "mes/peg/string-peg.scm")
|
||||||
|
(include-from-path "mes/peg/using-parsers.scm")
|
||||||
|
(include-from-path "mes/peg/simplify-tree.scm")
|
47
module/mes/peg/cache.scm
Normal file
47
module/mes/peg/cache.scm
Normal file
|
@ -0,0 +1,47 @@
|
||||||
|
;;; -*-scheme-*-
|
||||||
|
|
||||||
|
;;;; cache.scm --- cache the results of parsing
|
||||||
|
;;;;
|
||||||
|
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
;;;; License as published by the Free Software Foundation; either
|
||||||
|
;;;; version 3 of the License, or (at your option) any later version.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;;; Lesser General Public License for more details.
|
||||||
|
;;;;
|
||||||
|
;;;; You should have received a copy of the GNU Lesser General Public
|
||||||
|
;;;; License along with this library; if not, write to the Free Software
|
||||||
|
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;;;;
|
||||||
|
|
||||||
|
(define-module (ice-9 peg cache)
|
||||||
|
#:export (cg-cached-parser))
|
||||||
|
|
||||||
|
;; The results of parsing using a nonterminal are cached. Think of it like a
|
||||||
|
;; hash with no conflict resolution. Process for deciding on the cache size
|
||||||
|
;; wasn't very scientific; just ran the benchmarks and stopped a little after
|
||||||
|
;; the point of diminishing returns on my box.
|
||||||
|
(define *cache-size* 512)
|
||||||
|
|
||||||
|
(define (make-cache)
|
||||||
|
(make-vector *cache-size* #f))
|
||||||
|
|
||||||
|
;; given a syntax object which is a parser function, returns syntax
|
||||||
|
;; which, if evaluated, will become a parser function that uses a cache.
|
||||||
|
(define (cg-cached-parser parser)
|
||||||
|
#`(let ((cache (make-cache)))
|
||||||
|
(lambda (str strlen at)
|
||||||
|
(let* ((vref (vector-ref cache (modulo at *cache-size*))))
|
||||||
|
;; Check to see whether the value is cached.
|
||||||
|
(if (and vref (eq? (car vref) str) (= (cadr vref) at))
|
||||||
|
(caddr vref);; If it is return it.
|
||||||
|
(let ((fres ;; Else calculate it and cache it.
|
||||||
|
(#,parser str strlen at)))
|
||||||
|
(vector-set! cache (modulo at *cache-size*)
|
||||||
|
(list str at fres))
|
||||||
|
fres))))))
|
358
module/mes/peg/codegen.scm
Normal file
358
module/mes/peg/codegen.scm
Normal file
|
@ -0,0 +1,358 @@
|
||||||
|
;;;; codegen.scm --- code generation for composable parsers
|
||||||
|
;;;;
|
||||||
|
;;;; Copyright (C) 2011 Free Software Foundation, Inc.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
;;;; License as published by the Free Software Foundation; either
|
||||||
|
;;;; version 3 of the License, or (at your option) any later version.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;;; Lesser General Public License for more details.
|
||||||
|
;;;;
|
||||||
|
;;;; You should have received a copy of the GNU Lesser General Public
|
||||||
|
;;;; License along with this library; if not, write to the Free Software
|
||||||
|
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;;;;
|
||||||
|
|
||||||
|
(define-module (ice-9 peg codegen)
|
||||||
|
#:export (compile-peg-pattern wrap-parser-for-users add-peg-compiler!)
|
||||||
|
#:use-module (ice-9 pretty-print)
|
||||||
|
#:use-module (system base pmatch))
|
||||||
|
|
||||||
|
(define-syntax single?
|
||||||
|
(syntax-rules ()
|
||||||
|
;;"Return #t if X is a list of one element."
|
||||||
|
((_ x)
|
||||||
|
(pmatch x
|
||||||
|
((_) #t)
|
||||||
|
(else #f)))))
|
||||||
|
|
||||||
|
(define-syntax single-filter
|
||||||
|
(syntax-rules ()
|
||||||
|
;;"If EXP is a list of one element, return the element. Otherwise return EXP."
|
||||||
|
((_ exp)
|
||||||
|
(pmatch exp
|
||||||
|
((,elt) elt)
|
||||||
|
(,elts elts)))))
|
||||||
|
|
||||||
|
(define-syntax push-not-null!
|
||||||
|
(syntax-rules ()
|
||||||
|
;;"If OBJ is non-null, push it onto LST, otherwise do nothing."
|
||||||
|
((_ lst obj)
|
||||||
|
(if (not (null? obj))
|
||||||
|
(push! lst obj)))))
|
||||||
|
|
||||||
|
(define-syntax push!
|
||||||
|
(syntax-rules ()
|
||||||
|
;;"Push an object onto a list."
|
||||||
|
((_ lst obj)
|
||||||
|
(set! lst (cons obj lst)))))
|
||||||
|
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;; CODE GENERATORS
|
||||||
|
;; These functions generate scheme code for parsing PEGs.
|
||||||
|
;; Conventions:
|
||||||
|
;; accum: (all name body none)
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Code we generate will have a certain return structure depending on how we're
|
||||||
|
;; accumulating (the ACCUM variable).
|
||||||
|
(define (cg-generic-ret accum name body-uneval at)
|
||||||
|
;; name, body-uneval and at are syntax
|
||||||
|
#`(let ((body #,body-uneval))
|
||||||
|
#,(cond
|
||||||
|
((and (eq? accum 'all) name)
|
||||||
|
#`(list #,at
|
||||||
|
(cond
|
||||||
|
((not (list? body)) (list '#,name body))
|
||||||
|
((null? body) '#,name)
|
||||||
|
((symbol? (car body)) (list '#,name body))
|
||||||
|
(else (cons '#,name body)))))
|
||||||
|
((eq? accum 'name)
|
||||||
|
#`(list #,at '#,name))
|
||||||
|
((eq? accum 'body)
|
||||||
|
#`(list #,at
|
||||||
|
(cond
|
||||||
|
((single? body) (car body))
|
||||||
|
(else body))))
|
||||||
|
((eq? accum 'none)
|
||||||
|
#`(list #,at '()))
|
||||||
|
(else
|
||||||
|
(begin
|
||||||
|
(pretty-print `(cg-generic-ret-error ,accum ,name ,body-uneval ,at))
|
||||||
|
(pretty-print "Defaulting to accum of none.\n")
|
||||||
|
#`(list #,at '()))))))
|
||||||
|
|
||||||
|
;; The short name makes the formatting below much easier to read.
|
||||||
|
(define cggr cg-generic-ret)
|
||||||
|
|
||||||
|
;; Generates code that matches a particular string.
|
||||||
|
;; E.g.: (cg-string syntax "abc" 'body)
|
||||||
|
(define (cg-string pat accum)
|
||||||
|
(let ((plen (string-length pat)))
|
||||||
|
#`(lambda (str len pos)
|
||||||
|
(let ((end (+ pos #,plen)))
|
||||||
|
(and (<= end len)
|
||||||
|
(string= str #,pat pos end)
|
||||||
|
#,(case accum
|
||||||
|
((all) #`(list end (list 'cg-string #,pat)))
|
||||||
|
((name) #`(list end 'cg-string))
|
||||||
|
((body) #`(list end #,pat))
|
||||||
|
((none) #`(list end '()))
|
||||||
|
(else (error "bad accum" accum))))))))
|
||||||
|
|
||||||
|
;; Generates code for matching any character.
|
||||||
|
;; E.g.: (cg-peg-any syntax 'body)
|
||||||
|
(define (cg-peg-any accum)
|
||||||
|
#`(lambda (str len pos)
|
||||||
|
(and (< pos len)
|
||||||
|
#,(case accum
|
||||||
|
((all) #`(list (1+ pos)
|
||||||
|
(list 'cg-peg-any (substring str pos (1+ pos)))))
|
||||||
|
((name) #`(list (1+ pos) 'cg-peg-any))
|
||||||
|
((body) #`(list (1+ pos) (substring str pos (1+ pos))))
|
||||||
|
((none) #`(list (1+ pos) '()))
|
||||||
|
(else (error "bad accum" accum))))))
|
||||||
|
|
||||||
|
;; Generates code for matching a range of characters between start and end.
|
||||||
|
;; E.g.: (cg-range syntax #\a #\z 'body)
|
||||||
|
(define (cg-range pat accum)
|
||||||
|
(syntax-case pat ()
|
||||||
|
((start end)
|
||||||
|
(if (not (and (char? (syntax->datum #'start))
|
||||||
|
(char? (syntax->datum #'end))))
|
||||||
|
(error "range PEG should have characters after it; instead got"
|
||||||
|
#'start #'end))
|
||||||
|
#`(lambda (str len pos)
|
||||||
|
(and (< pos len)
|
||||||
|
(let ((c (string-ref str pos)))
|
||||||
|
(and (char>=? c start)
|
||||||
|
(char<=? c end)
|
||||||
|
#,(case accum
|
||||||
|
((all) #`(list (1+ pos) (list 'cg-range (string c))))
|
||||||
|
((name) #`(list (1+ pos) 'cg-range))
|
||||||
|
((body) #`(list (1+ pos) (string c)))
|
||||||
|
((none) #`(list (1+ pos) '()))
|
||||||
|
(else (error "bad accum" accum))))))))))
|
||||||
|
|
||||||
|
;; Generate code to match a pattern and do nothing with the result
|
||||||
|
(define (cg-ignore pat accum)
|
||||||
|
(syntax-case pat ()
|
||||||
|
((inner)
|
||||||
|
(compile-peg-pattern #'inner 'none))))
|
||||||
|
|
||||||
|
(define (cg-capture pat accum)
|
||||||
|
(syntax-case pat ()
|
||||||
|
((inner)
|
||||||
|
(compile-peg-pattern #'inner 'body))))
|
||||||
|
|
||||||
|
;; Filters the accum argument to compile-peg-pattern for buildings like string
|
||||||
|
;; literals (since we don't want to tag them with their name if we're doing an
|
||||||
|
;; "all" accum).
|
||||||
|
(define (builtin-accum-filter accum)
|
||||||
|
(cond
|
||||||
|
((eq? accum 'all) 'body)
|
||||||
|
((eq? accum 'name) 'name)
|
||||||
|
((eq? accum 'body) 'body)
|
||||||
|
((eq? accum 'none) 'none)))
|
||||||
|
(define baf builtin-accum-filter)
|
||||||
|
|
||||||
|
;; Top-level function builder for AND. Reduces to a call to CG-AND-INT.
|
||||||
|
(define (cg-and clauses accum)
|
||||||
|
#`(lambda (str len pos)
|
||||||
|
(let ((body '()))
|
||||||
|
#,(cg-and-int clauses (baf accum) #'str #'len #'pos #'body))))
|
||||||
|
|
||||||
|
;; Internal function builder for AND (calls itself).
|
||||||
|
(define (cg-and-int clauses accum str strlen at body)
|
||||||
|
(syntax-case clauses ()
|
||||||
|
(()
|
||||||
|
(cggr accum 'cg-and #`(reverse #,body) at))
|
||||||
|
((first rest ...)
|
||||||
|
#`(let ((res (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)))
|
||||||
|
(and res
|
||||||
|
;; update AT and BODY then recurse
|
||||||
|
(let ((newat (car res))
|
||||||
|
(newbody (cadr res)))
|
||||||
|
(set! #,at newat)
|
||||||
|
(push-not-null! #,body (single-filter newbody))
|
||||||
|
#,(cg-and-int #'(rest ...) accum str strlen at body)))))))
|
||||||
|
|
||||||
|
;; Top-level function builder for OR. Reduces to a call to CG-OR-INT.
|
||||||
|
(define (cg-or clauses accum)
|
||||||
|
#`(lambda (str len pos)
|
||||||
|
#,(cg-or-int clauses (baf accum) #'str #'len #'pos)))
|
||||||
|
|
||||||
|
;; Internal function builder for OR (calls itself).
|
||||||
|
(define (cg-or-int clauses accum str strlen at)
|
||||||
|
(syntax-case clauses ()
|
||||||
|
(()
|
||||||
|
#f)
|
||||||
|
((first rest ...)
|
||||||
|
#`(or (#,(compile-peg-pattern #'first accum) #,str #,strlen #,at)
|
||||||
|
#,(cg-or-int #'(rest ...) accum str strlen at)))))
|
||||||
|
|
||||||
|
(define (cg-* args accum)
|
||||||
|
(syntax-case args ()
|
||||||
|
((pat)
|
||||||
|
#`(lambda (str strlen at)
|
||||||
|
(let ((body '()))
|
||||||
|
(let lp ((end at) (count 0))
|
||||||
|
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||||
|
str strlen end))
|
||||||
|
(new-end (if match (car match) end))
|
||||||
|
(count (if (> new-end end) (1+ count) count)))
|
||||||
|
(if (> new-end end)
|
||||||
|
(push-not-null! body (single-filter (cadr match))))
|
||||||
|
(if (and (> new-end end)
|
||||||
|
#,#t)
|
||||||
|
(lp new-end count)
|
||||||
|
(let ((success #,#t))
|
||||||
|
#,#`(and success
|
||||||
|
#,(cggr (baf accum) 'cg-body
|
||||||
|
#'(reverse body) #'new-end)))))))))))
|
||||||
|
|
||||||
|
(define (cg-+ args accum)
|
||||||
|
(syntax-case args ()
|
||||||
|
((pat)
|
||||||
|
#`(lambda (str strlen at)
|
||||||
|
(let ((body '()))
|
||||||
|
(let lp ((end at) (count 0))
|
||||||
|
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||||
|
str strlen end))
|
||||||
|
(new-end (if match (car match) end))
|
||||||
|
(count (if (> new-end end) (1+ count) count)))
|
||||||
|
(if (> new-end end)
|
||||||
|
(push-not-null! body (single-filter (cadr match))))
|
||||||
|
(if (and (> new-end end)
|
||||||
|
#,#t)
|
||||||
|
(lp new-end count)
|
||||||
|
(let ((success #,#'(>= count 1)))
|
||||||
|
#,#`(and success
|
||||||
|
#,(cggr (baf accum) 'cg-body
|
||||||
|
#'(reverse body) #'new-end)))))))))))
|
||||||
|
|
||||||
|
(define (cg-? args accum)
|
||||||
|
(syntax-case args ()
|
||||||
|
((pat)
|
||||||
|
#`(lambda (str strlen at)
|
||||||
|
(let ((body '()))
|
||||||
|
(let lp ((end at) (count 0))
|
||||||
|
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||||
|
str strlen end))
|
||||||
|
(new-end (if match (car match) end))
|
||||||
|
(count (if (> new-end end) (1+ count) count)))
|
||||||
|
(if (> new-end end)
|
||||||
|
(push-not-null! body (single-filter (cadr match))))
|
||||||
|
(if (and (> new-end end)
|
||||||
|
#,#'(< count 1))
|
||||||
|
(lp new-end count)
|
||||||
|
(let ((success #,#t))
|
||||||
|
#,#`(and success
|
||||||
|
#,(cggr (baf accum) 'cg-body
|
||||||
|
#'(reverse body) #'new-end)))))))))))
|
||||||
|
|
||||||
|
(define (cg-followed-by args accum)
|
||||||
|
(syntax-case args ()
|
||||||
|
((pat)
|
||||||
|
#`(lambda (str strlen at)
|
||||||
|
(let ((body '()))
|
||||||
|
(let lp ((end at) (count 0))
|
||||||
|
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||||
|
str strlen end))
|
||||||
|
(new-end (if match (car match) end))
|
||||||
|
(count (if (> new-end end) (1+ count) count)))
|
||||||
|
(if (> new-end end)
|
||||||
|
(push-not-null! body (single-filter (cadr match))))
|
||||||
|
(if (and (> new-end end)
|
||||||
|
#,#'(< count 1))
|
||||||
|
(lp new-end count)
|
||||||
|
(let ((success #,#'(= count 1)))
|
||||||
|
#,#`(and success
|
||||||
|
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
|
||||||
|
|
||||||
|
(define (cg-not-followed-by args accum)
|
||||||
|
(syntax-case args ()
|
||||||
|
((pat)
|
||||||
|
#`(lambda (str strlen at)
|
||||||
|
(let ((body '()))
|
||||||
|
(let lp ((end at) (count 0))
|
||||||
|
(let* ((match (#,(compile-peg-pattern #'pat (baf accum))
|
||||||
|
str strlen end))
|
||||||
|
(new-end (if match (car match) end))
|
||||||
|
(count (if (> new-end end) (1+ count) count)))
|
||||||
|
(if (> new-end end)
|
||||||
|
(push-not-null! body (single-filter (cadr match))))
|
||||||
|
(if (and (> new-end end)
|
||||||
|
#,#'(< count 1))
|
||||||
|
(lp new-end count)
|
||||||
|
(let ((success #,#'(= count 1)))
|
||||||
|
#,#`(if success
|
||||||
|
#f
|
||||||
|
#,(cggr (baf accum) 'cg-body #''() #'at)))))))))))
|
||||||
|
|
||||||
|
;; Association list of functions to handle different expressions as PEGs
|
||||||
|
(define peg-compiler-alist '())
|
||||||
|
|
||||||
|
(define (add-peg-compiler! symbol function)
|
||||||
|
(set! peg-compiler-alist
|
||||||
|
(assq-set! peg-compiler-alist symbol function)))
|
||||||
|
|
||||||
|
(add-peg-compiler! 'range cg-range)
|
||||||
|
(add-peg-compiler! 'ignore cg-ignore)
|
||||||
|
(add-peg-compiler! 'capture cg-capture)
|
||||||
|
(add-peg-compiler! 'and cg-and)
|
||||||
|
(add-peg-compiler! 'or cg-or)
|
||||||
|
(add-peg-compiler! '* cg-*)
|
||||||
|
(add-peg-compiler! '+ cg-+)
|
||||||
|
(add-peg-compiler! '? cg-?)
|
||||||
|
(add-peg-compiler! 'followed-by cg-followed-by)
|
||||||
|
(add-peg-compiler! 'not-followed-by cg-not-followed-by)
|
||||||
|
|
||||||
|
;; Takes an arbitrary expressions and accumulation variable, then parses it.
|
||||||
|
;; E.g.: (compile-peg-pattern syntax '(and "abc" (or "-" (range #\a #\z))) 'all)
|
||||||
|
(define (compile-peg-pattern pat accum)
|
||||||
|
(syntax-case pat (peg-any)
|
||||||
|
(peg-any
|
||||||
|
(cg-peg-any (baf accum)))
|
||||||
|
(sym (identifier? #'sym) ;; nonterminal
|
||||||
|
#'sym)
|
||||||
|
(str (string? (syntax->datum #'str)) ;; literal string
|
||||||
|
(cg-string (syntax->datum #'str) (baf accum)))
|
||||||
|
((name . args) (let* ((nm (syntax->datum #'name))
|
||||||
|
(entry (assq-ref peg-compiler-alist nm)))
|
||||||
|
(if entry
|
||||||
|
(entry #'args accum)
|
||||||
|
(error "Bad peg form" nm #'args
|
||||||
|
"Not one of" (map car peg-compiler-alist)))))))
|
||||||
|
|
||||||
|
;; Packages the results of a parser
|
||||||
|
(define (wrap-parser-for-users for-syntax parser accumsym s-syn)
|
||||||
|
#`(lambda (str strlen at)
|
||||||
|
(let ((res (#,parser str strlen at)))
|
||||||
|
;; Try to match the nonterminal.
|
||||||
|
(if res
|
||||||
|
;; If we matched, do some post-processing to figure out
|
||||||
|
;; what data to propagate upward.
|
||||||
|
(let ((at (car res))
|
||||||
|
(body (cadr res)))
|
||||||
|
#,(cond
|
||||||
|
((eq? accumsym 'name)
|
||||||
|
#`(list at '#,s-syn))
|
||||||
|
((eq? accumsym 'all)
|
||||||
|
#`(list (car res)
|
||||||
|
(cond
|
||||||
|
((not (list? body))
|
||||||
|
(list '#,s-syn body))
|
||||||
|
((null? body) '#,s-syn)
|
||||||
|
((symbol? (car body))
|
||||||
|
(list '#,s-syn body))
|
||||||
|
(else (cons '#,s-syn body)))))
|
||||||
|
((eq? accumsym 'none) #`(list (car res) '()))
|
||||||
|
(else #`(begin res))))
|
||||||
|
;; If we didn't match, just return false.
|
||||||
|
#f))))
|
97
module/mes/peg/simplify-tree.scm
Normal file
97
module/mes/peg/simplify-tree.scm
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
;;;; simplify-tree.scm --- utility functions for the PEG parser
|
||||||
|
;;;;
|
||||||
|
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
;;;; License as published by the Free Software Foundation; either
|
||||||
|
;;;; version 3 of the License, or (at your option) any later version.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;;; Lesser General Public License for more details.
|
||||||
|
;;;;
|
||||||
|
;;;; You should have received a copy of the GNU Lesser General Public
|
||||||
|
;;;; License along with this library; if not, write to the Free Software
|
||||||
|
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;;;;
|
||||||
|
|
||||||
|
(define-module (ice-9 peg simplify-tree)
|
||||||
|
#:export (keyword-flatten context-flatten string-collapse)
|
||||||
|
#:use-module (system base pmatch))
|
||||||
|
|
||||||
|
(define-syntax single?
|
||||||
|
(syntax-rules ()
|
||||||
|
;;"Return #t if X is a list of one element."
|
||||||
|
((_ x)
|
||||||
|
(pmatch x
|
||||||
|
((_) #t)
|
||||||
|
(else #f)))))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;; POST-PROCESSING FUNCTIONS (TO CANONICALIZE MATCH TREES)
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Is everything in LST true?
|
||||||
|
(define (andlst lst)
|
||||||
|
(or (null? lst)
|
||||||
|
(and (car lst) (andlst (cdr lst)))))
|
||||||
|
|
||||||
|
;; Is LST a list of strings?
|
||||||
|
(define (string-list? lst)
|
||||||
|
(and (list? lst) (not (null? lst))
|
||||||
|
(andlst (map string? lst))))
|
||||||
|
|
||||||
|
;; Groups all strings that are next to each other in LST. Used in
|
||||||
|
;; STRING-COLLAPSE.
|
||||||
|
(define (string-group lst)
|
||||||
|
(if (not (list? lst))
|
||||||
|
lst
|
||||||
|
(if (null? lst)
|
||||||
|
'()
|
||||||
|
(let ((next (string-group (cdr lst))))
|
||||||
|
(if (not (string? (car lst)))
|
||||||
|
(cons (car lst) next)
|
||||||
|
(if (and (not (null? next))
|
||||||
|
(list? (car next))
|
||||||
|
(string? (caar next)))
|
||||||
|
(cons (cons (car lst) (car next)) (cdr next))
|
||||||
|
(cons (list (car lst)) next)))))))
|
||||||
|
|
||||||
|
|
||||||
|
;; Collapses all the string in LST.
|
||||||
|
;; ("a" "b" (c d) "e" "f") -> ("ab" (c d) "ef")
|
||||||
|
(define (string-collapse lst)
|
||||||
|
(if (list? lst)
|
||||||
|
(let ((res (map (lambda (x) (if (string-list? x)
|
||||||
|
(apply string-append x)
|
||||||
|
x))
|
||||||
|
(string-group (map string-collapse lst)))))
|
||||||
|
(if (single? res) (car res) res))
|
||||||
|
lst))
|
||||||
|
|
||||||
|
;; If LST is an atom, return (list LST), else return LST.
|
||||||
|
(define (mklst lst)
|
||||||
|
(if (not (list? lst)) (list lst) lst))
|
||||||
|
|
||||||
|
;; Takes a list and "flattens" it, using the predicate TST to know when to stop
|
||||||
|
;; instead of terminating on atoms (see tutorial).
|
||||||
|
(define (context-flatten tst lst)
|
||||||
|
(if (or (not (list? lst)) (null? lst))
|
||||||
|
lst
|
||||||
|
(if (tst lst)
|
||||||
|
(list lst)
|
||||||
|
(apply append
|
||||||
|
(map (lambda (x) (mklst (context-flatten tst x)))
|
||||||
|
lst)))))
|
||||||
|
|
||||||
|
;; Takes a list and "flattens" it, using the list of keywords KEYWORD-LST to
|
||||||
|
;; know when to stop at (see tutorial).
|
||||||
|
(define (keyword-flatten keyword-lst lst)
|
||||||
|
(context-flatten
|
||||||
|
(lambda (x)
|
||||||
|
(if (or (not (list? x)) (null? x))
|
||||||
|
#t
|
||||||
|
(member (car x) keyword-lst)))
|
||||||
|
lst))
|
273
module/mes/peg/string-peg.scm
Normal file
273
module/mes/peg/string-peg.scm
Normal file
|
@ -0,0 +1,273 @@
|
||||||
|
;;;; string-peg.scm --- representing PEG grammars as strings
|
||||||
|
;;;;
|
||||||
|
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
;;;; License as published by the Free Software Foundation; either
|
||||||
|
;;;; version 3 of the License, or (at your option) any later version.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;;; Lesser General Public License for more details.
|
||||||
|
;;;;
|
||||||
|
;;;; You should have received a copy of the GNU Lesser General Public
|
||||||
|
;;;; License along with this library; if not, write to the Free Software
|
||||||
|
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;;;;
|
||||||
|
|
||||||
|
(define-module (ice-9 peg string-peg)
|
||||||
|
#:export (peg-as-peg
|
||||||
|
define-peg-string-patterns
|
||||||
|
peg-grammar)
|
||||||
|
#:use-module (ice-9 peg using-parsers)
|
||||||
|
#:use-module (ice-9 peg codegen)
|
||||||
|
#:use-module (ice-9 peg simplify-tree))
|
||||||
|
|
||||||
|
;; Gets the left-hand depth of a list.
|
||||||
|
(define (depth lst)
|
||||||
|
(if (or (not (list? lst)) (null? lst))
|
||||||
|
0
|
||||||
|
(+ 1 (depth (car lst)))))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;; Parse string PEGs using sexp PEGs.
|
||||||
|
;; See the variable PEG-AS-PEG for an easier-to-read syntax.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Grammar for PEGs in PEG grammar.
|
||||||
|
(define peg-as-peg
|
||||||
|
"grammar <-- (nonterminal ('<--' / '<-' / '<') sp pattern)+
|
||||||
|
pattern <-- alternative (SLASH sp alternative)*
|
||||||
|
alternative <-- ([!&]? sp suffix)+
|
||||||
|
suffix <-- primary ([*+?] sp)*
|
||||||
|
primary <-- '(' sp pattern ')' sp / '.' sp / literal / charclass / nonterminal !'<'
|
||||||
|
literal <-- ['] (!['] .)* ['] sp
|
||||||
|
charclass <-- LB (!']' (CCrange / CCsingle))* RB sp
|
||||||
|
CCrange <-- . '-' .
|
||||||
|
CCsingle <-- .
|
||||||
|
nonterminal <-- [a-zA-Z0-9-]+ sp
|
||||||
|
sp < [ \t\n]*
|
||||||
|
SLASH < '/'
|
||||||
|
LB < '['
|
||||||
|
RB < ']'
|
||||||
|
")
|
||||||
|
|
||||||
|
(define-syntax define-sexp-parser
|
||||||
|
(lambda (x)
|
||||||
|
(syntax-case x ()
|
||||||
|
((_ sym accum pat)
|
||||||
|
(let* ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
|
||||||
|
(accumsym (syntax->datum #'accum))
|
||||||
|
(syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||||
|
#`(define sym #,syn))))))
|
||||||
|
|
||||||
|
(define-sexp-parser peg-grammar all
|
||||||
|
(+ (and peg-nonterminal (or "<--" "<-" "<") peg-sp peg-pattern)))
|
||||||
|
(define-sexp-parser peg-pattern all
|
||||||
|
(and peg-alternative
|
||||||
|
(* (and (ignore "/") peg-sp peg-alternative))))
|
||||||
|
(define-sexp-parser peg-alternative all
|
||||||
|
(+ (and (? (or "!" "&")) peg-sp peg-suffix)))
|
||||||
|
(define-sexp-parser peg-suffix all
|
||||||
|
(and peg-primary (* (and (or "*" "+" "?") peg-sp))))
|
||||||
|
(define-sexp-parser peg-primary all
|
||||||
|
(or (and "(" peg-sp peg-pattern ")" peg-sp)
|
||||||
|
(and "." peg-sp)
|
||||||
|
peg-literal
|
||||||
|
peg-charclass
|
||||||
|
(and peg-nonterminal (not-followed-by "<"))))
|
||||||
|
(define-sexp-parser peg-literal all
|
||||||
|
(and "'" (* (and (not-followed-by "'") peg-any)) "'" peg-sp))
|
||||||
|
(define-sexp-parser peg-charclass all
|
||||||
|
(and (ignore "[")
|
||||||
|
(* (and (not-followed-by "]")
|
||||||
|
(or charclass-range charclass-single)))
|
||||||
|
(ignore "]")
|
||||||
|
peg-sp))
|
||||||
|
(define-sexp-parser charclass-range all (and peg-any "-" peg-any))
|
||||||
|
(define-sexp-parser charclass-single all peg-any)
|
||||||
|
(define-sexp-parser peg-nonterminal all
|
||||||
|
(and (+ (or (range #\a #\z) (range #\A #\Z) (range #\0 #\9) "-")) peg-sp))
|
||||||
|
(define-sexp-parser peg-sp none
|
||||||
|
(* (or " " "\t" "\n")))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;; PARSE STRING PEGS
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Takes a string representing a PEG grammar and returns syntax that
|
||||||
|
;; will define all of the nonterminals in the grammar with equivalent
|
||||||
|
;; PEG s-expressions.
|
||||||
|
(define (peg-parser str for-syntax)
|
||||||
|
(let ((parsed (match-pattern peg-grammar str)))
|
||||||
|
(if (not parsed)
|
||||||
|
(begin
|
||||||
|
;; (display "Invalid PEG grammar!\n")
|
||||||
|
#f)
|
||||||
|
(let ((lst (peg:tree parsed)))
|
||||||
|
(cond
|
||||||
|
((or (not (list? lst)) (null? lst))
|
||||||
|
lst)
|
||||||
|
((eq? (car lst) 'peg-grammar)
|
||||||
|
#`(begin
|
||||||
|
#,@(map (lambda (x) (peg-nonterm->defn x for-syntax))
|
||||||
|
(context-flatten (lambda (lst) (<= (depth lst) 2))
|
||||||
|
(cdr lst))))))))))
|
||||||
|
|
||||||
|
;; Macro wrapper for PEG-PARSER. Parses PEG grammars expressed as strings and
|
||||||
|
;; defines all the appropriate nonterminals.
|
||||||
|
(define-syntax define-peg-string-patterns
|
||||||
|
(lambda (x)
|
||||||
|
(syntax-case x ()
|
||||||
|
((_ str)
|
||||||
|
(peg-parser (syntax->datum #'str) x)))))
|
||||||
|
|
||||||
|
;; lst has format (nonterm grabber pattern), where
|
||||||
|
;; nonterm is a symbol (the name of the nonterminal),
|
||||||
|
;; grabber is a string (either "<", "<-" or "<--"), and
|
||||||
|
;; pattern is the parse of a PEG pattern expressed as as string.
|
||||||
|
(define (peg-nonterm->defn lst for-syntax)
|
||||||
|
(let* ((nonterm (car lst))
|
||||||
|
(grabber (cadr lst))
|
||||||
|
(pattern (caddr lst))
|
||||||
|
(nonterm-name (datum->syntax for-syntax
|
||||||
|
(string->symbol (cadr nonterm)))))
|
||||||
|
#`(define-peg-pattern #,nonterm-name
|
||||||
|
#,(cond
|
||||||
|
((string=? grabber "<--") (datum->syntax for-syntax 'all))
|
||||||
|
((string=? grabber "<-") (datum->syntax for-syntax 'body))
|
||||||
|
(else (datum->syntax for-syntax 'none)))
|
||||||
|
#,(compressor (peg-pattern->defn pattern for-syntax) for-syntax))))
|
||||||
|
|
||||||
|
;; lst has format ('peg-pattern ...).
|
||||||
|
;; After the context-flatten, (cdr lst) has format
|
||||||
|
;; (('peg-alternative ...) ...), where the outer list is a collection
|
||||||
|
;; of elements from a '/' alternative.
|
||||||
|
(define (peg-pattern->defn lst for-syntax)
|
||||||
|
#`(or #,@(map (lambda (x) (peg-alternative->defn x for-syntax))
|
||||||
|
(context-flatten (lambda (x) (eq? (car x) 'peg-alternative))
|
||||||
|
(cdr lst)))))
|
||||||
|
|
||||||
|
;; lst has format ('peg-alternative ...).
|
||||||
|
;; After the context-flatten, (cdr lst) has the format
|
||||||
|
;; (item ...), where each item has format either ("!" ...), ("&" ...),
|
||||||
|
;; or ('peg-suffix ...).
|
||||||
|
(define (peg-alternative->defn lst for-syntax)
|
||||||
|
#`(and #,@(map (lambda (x) (peg-body->defn x for-syntax))
|
||||||
|
(context-flatten (lambda (x) (or (string? (car x))
|
||||||
|
(eq? (car x) 'peg-suffix)))
|
||||||
|
(cdr lst)))))
|
||||||
|
|
||||||
|
;; lst has the format either
|
||||||
|
;; ("!" ('peg-suffix ...)), ("&" ('peg-suffix ...)), or
|
||||||
|
;; ('peg-suffix ...).
|
||||||
|
(define (peg-body->defn lst for-syntax)
|
||||||
|
(cond
|
||||||
|
((equal? (car lst) "&")
|
||||||
|
#`(followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
||||||
|
((equal? (car lst) "!")
|
||||||
|
#`(not-followed-by #,(peg-suffix->defn (cadr lst) for-syntax)))
|
||||||
|
((eq? (car lst) 'peg-suffix)
|
||||||
|
(peg-suffix->defn lst for-syntax))
|
||||||
|
(else `(peg-parse-body-fail ,lst))))
|
||||||
|
|
||||||
|
;; lst has format ('peg-suffix <peg-primary> (? (/ "*" "?" "+")))
|
||||||
|
(define (peg-suffix->defn lst for-syntax)
|
||||||
|
(let ((inner-defn (peg-primary->defn (cadr lst) for-syntax)))
|
||||||
|
(cond
|
||||||
|
((null? (cddr lst))
|
||||||
|
inner-defn)
|
||||||
|
((equal? (caddr lst) "*")
|
||||||
|
#`(* #,inner-defn))
|
||||||
|
((equal? (caddr lst) "?")
|
||||||
|
#`(? #,inner-defn))
|
||||||
|
((equal? (caddr lst) "+")
|
||||||
|
#`(+ #,inner-defn)))))
|
||||||
|
|
||||||
|
;; Parse a primary.
|
||||||
|
(define (peg-primary->defn lst for-syntax)
|
||||||
|
(let ((el (cadr lst)))
|
||||||
|
(cond
|
||||||
|
((list? el)
|
||||||
|
(cond
|
||||||
|
((eq? (car el) 'peg-literal)
|
||||||
|
(peg-literal->defn el for-syntax))
|
||||||
|
((eq? (car el) 'peg-charclass)
|
||||||
|
(peg-charclass->defn el for-syntax))
|
||||||
|
((eq? (car el) 'peg-nonterminal)
|
||||||
|
(datum->syntax for-syntax (string->symbol (cadr el))))))
|
||||||
|
((string? el)
|
||||||
|
(cond
|
||||||
|
((equal? el "(")
|
||||||
|
(peg-pattern->defn (caddr lst) for-syntax))
|
||||||
|
((equal? el ".")
|
||||||
|
(datum->syntax for-syntax 'peg-any))
|
||||||
|
(else (datum->syntax for-syntax
|
||||||
|
`(peg-parse-any unknown-string ,lst)))))
|
||||||
|
(else (datum->syntax for-syntax
|
||||||
|
`(peg-parse-any unknown-el ,lst))))))
|
||||||
|
|
||||||
|
;; Trims characters off the front and end of STR.
|
||||||
|
;; (trim-1chars "'ab'") -> "ab"
|
||||||
|
(define (trim-1chars str) (substring str 1 (- (string-length str) 1)))
|
||||||
|
|
||||||
|
;; Parses a literal.
|
||||||
|
(define (peg-literal->defn lst for-syntax)
|
||||||
|
(datum->syntax for-syntax (trim-1chars (cadr lst))))
|
||||||
|
|
||||||
|
;; Parses a charclass.
|
||||||
|
(define (peg-charclass->defn lst for-syntax)
|
||||||
|
#`(or
|
||||||
|
#,@(map
|
||||||
|
(lambda (cc)
|
||||||
|
(cond
|
||||||
|
((eq? (car cc) 'charclass-range)
|
||||||
|
#`(range #,(datum->syntax
|
||||||
|
for-syntax
|
||||||
|
(string-ref (cadr cc) 0))
|
||||||
|
#,(datum->syntax
|
||||||
|
for-syntax
|
||||||
|
(string-ref (cadr cc) 2))))
|
||||||
|
((eq? (car cc) 'charclass-single)
|
||||||
|
(datum->syntax for-syntax (cadr cc)))))
|
||||||
|
(context-flatten
|
||||||
|
(lambda (x) (or (eq? (car x) 'charclass-range)
|
||||||
|
(eq? (car x) 'charclass-single)))
|
||||||
|
(cdr lst)))))
|
||||||
|
|
||||||
|
;; Compresses a list to save the optimizer work.
|
||||||
|
;; e.g. (or (and a)) -> a
|
||||||
|
(define (compressor-core lst)
|
||||||
|
(if (or (not (list? lst)) (null? lst))
|
||||||
|
lst
|
||||||
|
(cond
|
||||||
|
((and (or (eq? (car lst) 'or) (eq? (car lst) 'and))
|
||||||
|
(null? (cddr lst)))
|
||||||
|
(compressor-core (cadr lst)))
|
||||||
|
((and (eq? (car lst) 'body)
|
||||||
|
(eq? (cadr lst) 'lit)
|
||||||
|
(eq? (cadddr lst) 1))
|
||||||
|
(compressor-core (caddr lst)))
|
||||||
|
(else (map compressor-core lst)))))
|
||||||
|
|
||||||
|
(define (compressor syn for-syntax)
|
||||||
|
(datum->syntax for-syntax
|
||||||
|
(compressor-core (syntax->datum syn))))
|
||||||
|
|
||||||
|
;; Builds a lambda-expressions for the pattern STR using accum.
|
||||||
|
(define (peg-string-compile args accum)
|
||||||
|
(syntax-case args ()
|
||||||
|
((str-stx) (string? (syntax->datum #'str-stx))
|
||||||
|
(let ((string (syntax->datum #'str-stx)))
|
||||||
|
(compile-peg-pattern
|
||||||
|
(compressor
|
||||||
|
(peg-pattern->defn
|
||||||
|
(peg:tree (match-pattern peg-pattern string)) #'str-stx)
|
||||||
|
#'str-stx)
|
||||||
|
(if (eq? accum 'all) 'body accum))))
|
||||||
|
(else (error "Bad embedded PEG string" args))))
|
||||||
|
|
||||||
|
(add-peg-compiler! 'peg peg-string-compile)
|
||||||
|
|
115
module/mes/peg/using-parsers.scm
Normal file
115
module/mes/peg/using-parsers.scm
Normal file
|
@ -0,0 +1,115 @@
|
||||||
|
;;;; using-parsers.scm --- utilities to make using parsers easier
|
||||||
|
;;;;
|
||||||
|
;;;; Copyright (C) 2010, 2011 Free Software Foundation, Inc.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is free software; you can redistribute it and/or
|
||||||
|
;;;; modify it under the terms of the GNU Lesser General Public
|
||||||
|
;;;; License as published by the Free Software Foundation; either
|
||||||
|
;;;; version 3 of the License, or (at your option) any later version.
|
||||||
|
;;;;
|
||||||
|
;;;; This library is distributed in the hope that it will be useful,
|
||||||
|
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
;;;; Lesser General Public License for more details.
|
||||||
|
;;;;
|
||||||
|
;;;; You should have received a copy of the GNU Lesser General Public
|
||||||
|
;;;; License along with this library; if not, write to the Free Software
|
||||||
|
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
;;;;
|
||||||
|
|
||||||
|
(define-module (ice-9 peg using-parsers)
|
||||||
|
#:use-module (ice-9 peg simplify-tree)
|
||||||
|
#:use-module (ice-9 peg codegen)
|
||||||
|
#:use-module (ice-9 peg cache)
|
||||||
|
#:export (match-pattern define-peg-pattern search-for-pattern
|
||||||
|
prec make-prec peg:start peg:end peg:string
|
||||||
|
peg:tree peg:substring peg-record?))
|
||||||
|
|
||||||
|
;;;
|
||||||
|
;;; Helper Macros
|
||||||
|
;;;
|
||||||
|
|
||||||
|
(define-syntax until
|
||||||
|
(syntax-rules ()
|
||||||
|
;;"Evaluate TEST. If it is true, return its value. Otherwise,execute the STMTs and try again."
|
||||||
|
((_ test stmt stmt* ...)
|
||||||
|
(let lp ()
|
||||||
|
(or test
|
||||||
|
(begin stmt stmt* ... (lp)))))))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;; FOR DEFINING AND USING NONTERMINALS
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Parses STRING using NONTERM
|
||||||
|
(define (match-pattern nonterm string)
|
||||||
|
;; We copy the string before using it because it might have been modified
|
||||||
|
;; in-place since the last time it was parsed, which would invalidate the
|
||||||
|
;; cache. Guile uses copy-on-write for strings, so this is fast.
|
||||||
|
(let ((res (nonterm (string-copy string) (string-length string) 0)))
|
||||||
|
(if (not res)
|
||||||
|
#f
|
||||||
|
(make-prec 0 (car res) string (string-collapse (cadr res))))))
|
||||||
|
|
||||||
|
;; Defines a new nonterminal symbol accumulating with ACCUM.
|
||||||
|
(define-syntax define-peg-pattern
|
||||||
|
(lambda (x)
|
||||||
|
(syntax-case x ()
|
||||||
|
((_ sym accum pat)
|
||||||
|
(let ((matchf (compile-peg-pattern #'pat (syntax->datum #'accum)))
|
||||||
|
(accumsym (syntax->datum #'accum)))
|
||||||
|
;; CODE is the code to parse the string if the result isn't cached.
|
||||||
|
(let ((syn (wrap-parser-for-users x matchf accumsym #'sym)))
|
||||||
|
#`(define sym #,(cg-cached-parser syn))))))))
|
||||||
|
|
||||||
|
(define (peg-like->peg pat)
|
||||||
|
(syntax-case pat ()
|
||||||
|
(str (string? (syntax->datum #'str)) #'(peg str))
|
||||||
|
(else pat)))
|
||||||
|
|
||||||
|
;; Searches through STRING for something that parses to PEG-MATCHER. Think
|
||||||
|
;; regexp search.
|
||||||
|
(define-syntax search-for-pattern
|
||||||
|
(lambda (x)
|
||||||
|
(syntax-case x ()
|
||||||
|
((_ pattern string-uncopied)
|
||||||
|
(let ((pmsym (syntax->datum #'pattern)))
|
||||||
|
(let ((matcher (compile-peg-pattern (peg-like->peg #'pattern) 'body)))
|
||||||
|
;; We copy the string before using it because it might have been
|
||||||
|
;; modified in-place since the last time it was parsed, which would
|
||||||
|
;; invalidate the cache. Guile uses copy-on-write for strings, so
|
||||||
|
;; this is fast.
|
||||||
|
#`(let ((string (string-copy string-uncopied))
|
||||||
|
(strlen (string-length string-uncopied))
|
||||||
|
(at 0))
|
||||||
|
(let ((ret (until (or (>= at strlen)
|
||||||
|
(#,matcher string strlen at))
|
||||||
|
(set! at (+ at 1)))))
|
||||||
|
(if (eq? ret #t) ;; (>= at strlen) succeeded
|
||||||
|
#f
|
||||||
|
(let ((end (car ret))
|
||||||
|
(match (cadr ret)))
|
||||||
|
(make-prec
|
||||||
|
at end string
|
||||||
|
(string-collapse match))))))))))))
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;;;;; PMATCH STRUCTURE MUNGING
|
||||||
|
;; Pretty self-explanatory.
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
(define prec
|
||||||
|
(make-record-type "peg" '(start end string tree)))
|
||||||
|
(define make-prec
|
||||||
|
(record-constructor prec '(start end string tree)))
|
||||||
|
(define (peg:start pm)
|
||||||
|
(if pm ((record-accessor prec 'start) pm) #f))
|
||||||
|
(define (peg:end pm)
|
||||||
|
(if pm ((record-accessor prec 'end) pm) #f))
|
||||||
|
(define (peg:string pm)
|
||||||
|
(if pm ((record-accessor prec 'string) pm) #f))
|
||||||
|
(define (peg:tree pm)
|
||||||
|
(if pm ((record-accessor prec 'tree) pm) #f))
|
||||||
|
(define (peg:substring pm)
|
||||||
|
(if pm (substring (peg:string pm) (peg:start pm) (peg:end pm)) #f))
|
||||||
|
(define peg-record? (record-predicate prec))
|
72
tests/peg.test
Executable file
72
tests/peg.test
Executable file
|
@ -0,0 +1,72 @@
|
||||||
|
#! /bin/sh
|
||||||
|
# -*-scheme-*-
|
||||||
|
MES_ARENA=${MES_ARENA-10000000}
|
||||||
|
export MES_ARENA
|
||||||
|
echo ' ()' | cat $(dirname $0)/../module/mes/base-0.mes $0 /dev/stdin | $(dirname $0)/../scripts/mes $MES_FLAGS $MES_FLAGS"$@"
|
||||||
|
#paredit:||
|
||||||
|
exit $?
|
||||||
|
!#
|
||||||
|
|
||||||
|
;;; -*-scheme-*-
|
||||||
|
|
||||||
|
;;; Mes --- Maxwell Equations of Software
|
||||||
|
;;; Copyright © 2016 Jan Nieuwenhuizen <janneke@gnu.org>
|
||||||
|
;;;
|
||||||
|
;;; This file is part of Mes.
|
||||||
|
;;;
|
||||||
|
;;; Mes is free software; you can redistribute it and/or modify it
|
||||||
|
;;; under the terms of the GNU General Public License as published by
|
||||||
|
;;; the Free Software Foundation; either version 3 of the License, or (at
|
||||||
|
;;; your option) any later version.
|
||||||
|
;;;
|
||||||
|
;;; Mes is distributed in the hope that it will be useful, but
|
||||||
|
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
;;; GNU General Public License for more details.
|
||||||
|
;;;
|
||||||
|
;;; You should have received a copy of the GNU General Public License
|
||||||
|
;;; along with Mes. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
|
||||||
|
|
||||||
|
(cond-expand
|
||||||
|
(guile-2.2
|
||||||
|
(use-modules (ice-9 peg)))
|
||||||
|
(guile
|
||||||
|
(use-modules (ice-9 syncase))
|
||||||
|
(display "guile 2.0: no PEG\n" (current-error-port))
|
||||||
|
(exit 0))
|
||||||
|
(mes
|
||||||
|
(mes-use-module (mes peg))
|
||||||
|
(mes-use-module (mes test))))
|
||||||
|
|
||||||
|
(pass-if "first dummy" #t)
|
||||||
|
(pass-if-not "second dummy" #f)
|
||||||
|
|
||||||
|
(define *etc-passwd*
|
||||||
|
"root:x:0:0:root:/root:/bin/bash
|
||||||
|
daemon:x:1:1:daemon:/usr/sbin:/bin/sh
|
||||||
|
bin:x:2:2:bin:/bin:/bin/sh
|
||||||
|
sys:x:3:3:sys:/dev:/bin/sh
|
||||||
|
nobody:x:65534:65534:nobody:/nonexistent:/bin/sh
|
||||||
|
messagebus:x:103:107::/var/run/dbus:/bin/false")
|
||||||
|
|
||||||
|
(define-peg-string-patterns
|
||||||
|
"string-passwd <- entry* !.
|
||||||
|
entry <-- (! NL .)* NL*
|
||||||
|
NL < '\n'")
|
||||||
|
|
||||||
|
(pass-if-equal "peg-tree"
|
||||||
|
(map (lambda (x) (list 'entry x)) (string-split *etc-passwd* #\newline))
|
||||||
|
(peg:tree (match-pattern string-passwd *etc-passwd*)))
|
||||||
|
|
||||||
|
(define-peg-pattern passwd body (and (* entry) (not-followed-by peg-any)))
|
||||||
|
(define-peg-pattern entry all (and (* (and (not-followed-by NL) peg-any))
|
||||||
|
(* NL)))
|
||||||
|
(define-peg-pattern NL none "\n")
|
||||||
|
(define-peg-pattern passwd body (peg "entry* !."))
|
||||||
|
|
||||||
|
(pass-if-equal "peg-tree"
|
||||||
|
(map (lambda (x) (list 'entry x)) (string-split *etc-passwd* #\newline))
|
||||||
|
(peg:tree (match-pattern passwd *etc-passwd*)))
|
||||||
|
|
||||||
|
(result 'report)
|
Loading…
Reference in a new issue