changeset 212:32080f96c3a0

Implement matchOne matching macro. Support more AST node types in zeroPlus matching macro.
author Mike Pavone <pavone@retrodev.com>
date Sat, 30 Nov 2013 15:05:24 -0800
parents 53cd9c3bcf96
children e00a8bc6361b
files modules/parser.tp
diffstat 1 files changed, 146 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/modules/parser.tp	Sat Nov 30 15:04:52 2013 -0800
+++ b/modules/parser.tp	Sat Nov 30 15:05:24 2013 -0800
@@ -2,6 +2,100 @@
 	_applyMatch <- :fun tomatch {
 		fun: tomatch
 	}
+	_matchString <- :str tomatch {
+		if: (tomatch isString?) {
+			if: (tomatch length) < (str length) {
+				#{
+					matched? <- { false }
+				}
+			} else: {
+				if: (tomatch length) > (str length) {
+					tomatch <- tomatch from: 0 withLength: (str length)
+				}
+				if: str = tomatch {
+					#{
+						matched? <- { true }
+						matchlen <- { str length }
+					}
+				} else: {
+					#{
+						matched? <- { false }
+					}
+				}
+			}
+		} else: {
+			#{
+				matched? <- { false }
+			}
+		}
+	}
+	ifmatch:else <- :matchres :elseblock {
+		if: (matchres matched?) {
+			matchres
+		} else: {
+			elseblock:
+		}
+	}
+	_makeMatchCall <- :matchexpr {
+		if: (matchexpr nodeType) = "lambda" {
+			#{
+				valid? <- { true }
+				matchcall <- quote: (_applyMatch: matchexpr tomatch)
+			}
+		} else: {
+			if: (matchexpr nodeType) = "symbol" {
+				#{
+					valid? <- { true }
+					matchcall <- quote: (matchexpr: tomatch)
+				}
+			} else: {
+				if: (matchexpr nodeType) = "strlit" {
+					#{
+						valid? <- { true }
+						matchcall <- quote: (_matchString: matchexpr tomatch)
+					}
+				} else: {
+					if: (matchexpr nodeType) = "op" {
+						if: (matchexpr opName) = "." {
+							left <- (_makeMatchCall: (matchexpr left)) matchcall
+							right <- (_makeMatchCall: (matchexpr right)) matchcall
+							#{
+								valid? <- { true }
+								matchcall <- quote: (_applyMatch: :tomatch {
+									lm <- left
+									if: (lm matched?) {
+										tomatch <- tomatch from: (lm matchlen)
+										rm <- right
+										if: (rm matched?) {
+											total <- (rm matchlen) + (lm matchlen)
+											#{
+												matched? <- { true }
+												matchlen <- { total }
+											}
+										} else: {
+											rm
+										}
+									} else: {
+										lm
+									}
+								} tomatch)
+							}
+						} else: {
+							#{
+								valid? <- { false }
+								message <- "Unsupported operator " . (matchexpr opName)
+							}
+						}
+					} else: {
+						#{
+							valid? <- { false }
+							message <- "Unsupported AST node type " . (matchexpr nodeType)
+						}
+					}
+				}
+			}
+		}
+	}
 	expandClass <- :chars {
 		if: (chars length) > 0 {
 			pos <- 0
@@ -49,7 +143,19 @@
 				out <- ""
 				cur <- 0
 				while: { cur < 256 } do: {
-					out <- out . (cur asStringChar)
+					notfound <- true
+					idx <- 0
+					len <- (old length)
+					while: { notfound && idx < len } do: {
+						if: cur = (old byte: idx) {
+							notfound <- false
+						} else: {
+							idx <- idx + 1
+						}
+					}
+					if: notfound {
+						out <- out . (cur asStringChar)
+					}
 					cur <- cur + 1
 				}
 			}
@@ -60,6 +166,7 @@
 	}
 	charClass <- macro: :rawchars {
 		eval: rawchars :chars {
+			orig <- chars
 			chars <- expandClass: chars
 			//TODO: Use a more sophisticated approach for large classes
 			quote: :tomatch {
@@ -90,30 +197,23 @@
 				}
 			}
 		} else: {
-			print: "uh oh"
+			print: "#error Argument to charClass macro must be a compile-time constant\n"
 		}
 	}
 
 	zeroPlus <- macro: :matchexpr {
 		funexpr <- false
 		valid <- false
-		matchcall <- if: (matchexpr nodeType) = "lambda" {
-			valid <- true
-			quote: (_applyMatch: matchexpr tomatch)
-		} else: {
-			if: (matchexpr nodeType) = "symbol" {
-				valid <- true
-				quote: (matchexpr: tomatch)
-			}
-		}
-		if: valid {
+		mc <- _makeMatchCall: matchexpr
+		if: (mc valid?) {
+			mcall <- mc matchcall
 			quote: :tomatch {
 				cur <- 0
 				n <- tomatch byte_length
 				orig <- tomatch
 				match <- true
 				while: { match && cur < n } do: {
-					res <- matchcall
+					res <- mcall
 					match <- res matched?
 					if: match {
 						//TODO: Use some kind of lightweight substring wrapper here
@@ -133,7 +233,27 @@
 				}
 			}
 		} else: {
-			print: "#error Invalid zeroPlus macro call\n"
+			print: "#error Invalid zeroPlus macro call: " . (mc message) . "\n"
+		}
+	}
+
+	matchOne <- macro: :options {
+		options <- (options value) map: :option {
+			_makeMatchCall: option
+		}
+		body <- options foldr: (quote: #{
+			matched? <- { false }
+		}) with: :acc el {
+			if: (el valid?) {
+				mcall <- el matchcall
+				quote: (ifmatch: mcall else: { acc })
+			} else: {
+				print: "#error Invalid matchOne macro call: " . (el message) . "\n"
+				acc
+			}
+		}
+		quote: :tomatch {
+			body
 		}
 	}
 
@@ -141,6 +261,11 @@
 	_alpha <- charClass: "a-zA-Z"
 	alpha <- zeroPlus: _alpha
 	alphaNum <- zeroPlus: (charClass: "a-zA-Z0-9")
+	hws <- zeroPlus: (matchOne: [
+		(charClass: " \t")
+		"/*" . (zeroPlus: (matchOne: [(charClass: "^*") "*" . (charClass: "^/")])) . "*/"
+	])
+
 
 	main <- {
 		cmatch <- alpha: "czx0123"
@@ -161,5 +286,12 @@
 		} else: {
 			print: "01234 didn't match\n"
 		}
+		stuff <- " \t/* blah blah blah * blah */ foo"
+		hwsmatch <- hws: stuff
+		if: (hwsmatch matched?) {
+			print: "'" . (stuff from: (hwsmatch matchlen)) . "' found after hws\n"
+		} else: {
+			print: stuff . " did not match hws rule\n"
+		}
 	}
 }