view modules/parser.tp @ 212:32080f96c3a0

Implement matchOne matching macro. Support more AST node types in zeroPlus matching macro.
author Mike Pavone <pavone@retrodev.com>
date Sat, 30 Nov 2013 15:05:24 -0800
parents 4b3b57f39f10
children e00a8bc6361b
line wrap: on
line source

#{
	_applyMatch <- :fun tomatch {
		fun: tomatch
	}
	_matchString <- :str tomatch {
		if: (tomatch isString?) {
			if: (tomatch length) < (str length) {
				#{
					matched? <- { false }
				}
			} else: {
				if: (tomatch length) > (str length) {
					tomatch <- tomatch from: 0 withLength: (str length)
				}
				if: str = tomatch {
					#{
						matched? <- { true }
						matchlen <- { str length }
					}
				} else: {
					#{
						matched? <- { false }
					}
				}
			}
		} else: {
			#{
				matched? <- { false }
			}
		}
	}
	ifmatch:else <- :matchres :elseblock {
		if: (matchres matched?) {
			matchres
		} else: {
			elseblock:
		}
	}
	_makeMatchCall <- :matchexpr {
		if: (matchexpr nodeType) = "lambda" {
			#{
				valid? <- { true }
				matchcall <- quote: (_applyMatch: matchexpr tomatch)
			}
		} else: {
			if: (matchexpr nodeType) = "symbol" {
				#{
					valid? <- { true }
					matchcall <- quote: (matchexpr: tomatch)
				}
			} else: {
				if: (matchexpr nodeType) = "strlit" {
					#{
						valid? <- { true }
						matchcall <- quote: (_matchString: matchexpr tomatch)
					}
				} else: {
					if: (matchexpr nodeType) = "op" {
						if: (matchexpr opName) = "." {
							left <- (_makeMatchCall: (matchexpr left)) matchcall
							right <- (_makeMatchCall: (matchexpr right)) matchcall
							#{
								valid? <- { true }
								matchcall <- quote: (_applyMatch: :tomatch {
									lm <- left
									if: (lm matched?) {
										tomatch <- tomatch from: (lm matchlen)
										rm <- right
										if: (rm matched?) {
											total <- (rm matchlen) + (lm matchlen)
											#{
												matched? <- { true }
												matchlen <- { total }
											}
										} else: {
											rm
										}
									} else: {
										lm
									}
								} tomatch)
							}
						} else: {
							#{
								valid? <- { false }
								message <- "Unsupported operator " . (matchexpr opName)
							}
						}
					} else: {
						#{
							valid? <- { false }
							message <- "Unsupported AST node type " . (matchexpr nodeType)
						}
					}
				}
			}
		}
	}
	expandClass <- :chars {
		if: (chars length) > 0 {
			pos <- 0
			inverted <- false
			if: (chars byte: 0) = ("^" byte: 0) {
				pos <- 1
				inverted <- true
			}
			state_begin <- 0
			state_normal <- 1
			state_rangeend <- 2
			state <- state_begin
			out <- ""
			while: { pos < (chars byte_length)} do: {
				if: state = state_begin {
					out <- out . (chars from: pos withLength: 1)
					state <- state_normal
				} else: {
					if: state = state_normal {
						if: (chars byte: pos) = ("-" byte: 0) {
							state <- state_rangeend
						} else: {
							out <- out . (chars from: pos withLength: 1)
						}
					} else: {
						rangestart <- out byte: ((out byte_length) - 1)
						rangeend <- chars byte: pos
						if: rangeend < rangestart {
							tmp <- rangeend
							rangeend <- rangestart
							rangestart <- tmp
						}
						out <- out from: 0 withLength: ((out length) - 1)
						while: { rangestart <= rangeend } do: {
							out <- out . (rangestart asStringChar)
							rangestart <- rangestart + 1
						}
						state <- state_begin
					}
				}
				pos <- pos + 1
			}
			if: inverted {
				old <- out
				out <- ""
				cur <- 0
				while: { cur < 256 } do: {
					notfound <- true
					idx <- 0
					len <- (old length)
					while: { notfound && idx < len } do: {
						if: cur = (old byte: idx) {
							notfound <- false
						} else: {
							idx <- idx + 1
						}
					}
					if: notfound {
						out <- out . (cur asStringChar)
					}
					cur <- cur + 1
				}
			}
			out
		} else: {
			""
		}
	}
	charClass <- macro: :rawchars {
		eval: rawchars :chars {
			orig <- chars
			chars <- expandClass: chars
			//TODO: Use a more sophisticated approach for large classes
			quote: :tomatch {
				if: (tomatch isString?) {
					check <- 0

					nomatch <- true
					while: { nomatch && check < (chars byte_length) } do: {
						if: (tomatch byte: 0) = (chars byte: check) {
							nomatch <- false
						}
						check <- check + 1
					}
					if: nomatch {
						#{
							matched? <- { false }
						}
					} else: {
						#{
							matched? <- { true }
							matchlen <- { 1 }
						}
					}
				} else: {
					#{
						matched? <- { false }
					}
				}
			}
		} else: {
			print: "#error Argument to charClass macro must be a compile-time constant\n"
		}
	}

	zeroPlus <- macro: :matchexpr {
		funexpr <- false
		valid <- false
		mc <- _makeMatchCall: matchexpr
		if: (mc valid?) {
			mcall <- mc matchcall
			quote: :tomatch {
				cur <- 0
				n <- tomatch byte_length
				orig <- tomatch
				match <- true
				while: { match && cur < n } do: {
					res <- mcall
					match <- res matched?
					if: match {
						//TODO: Use some kind of lightweight substring wrapper here
						tomatch <- tomatch from: (res matchlen)
						cur <- cur + (res matchlen)
					}
				}
				if: cur > 0 {
					#{
						matched? <- { true }
						matchlen <- { cur }
					}
				} else: {
					#{
						matched? <- { false }
					}
				}
			}
		} else: {
			print: "#error Invalid zeroPlus macro call: " . (mc message) . "\n"
		}
	}

	matchOne <- macro: :options {
		options <- (options value) map: :option {
			_makeMatchCall: option
		}
		body <- options foldr: (quote: #{
			matched? <- { false }
		}) with: :acc el {
			if: (el valid?) {
				mcall <- el matchcall
				quote: (ifmatch: mcall else: { acc })
			} else: {
				print: "#error Invalid matchOne macro call: " . (el message) . "\n"
				acc
			}
		}
		quote: :tomatch {
			body
		}
	}


	_alpha <- charClass: "a-zA-Z"
	alpha <- zeroPlus: _alpha
	alphaNum <- zeroPlus: (charClass: "a-zA-Z0-9")
	hws <- zeroPlus: (matchOne: [
		(charClass: " \t")
		"/*" . (zeroPlus: (matchOne: [(charClass: "^*") "*" . (charClass: "^/")])) . "*/"
	])


	main <- {
		cmatch <- alpha: "czx0123"
		zeromatch <- alpha: "01234"
		if: (cmatch matched?) {
			print: "czx0123 matched with length " . (cmatch matchlen) . "\n"
		} else: {
			print: "czx0123 didn't match\n"
		}
		if: (zeromatch matched?) {
			print: "0123 matched with length " . (zeromatch matchlen) . "\n"
		} else: {
			print: "0123 didn't match\n"
		}
		zeromatchanum <- alphaNum: "01234"
		if: (zeromatchanum matched?) {
			print: "01234 matched with length " . (zeromatchanum matchlen) . "\n"
		} else: {
			print: "01234 didn't match\n"
		}
		stuff <- " \t/* blah blah blah * blah */ foo"
		hwsmatch <- hws: stuff
		if: (hwsmatch matched?) {
			print: "'" . (stuff from: (hwsmatch matchlen)) . "' found after hws\n"
		} else: {
			print: stuff . " did not match hws rule\n"
		}
	}
}