view modules/parser.tp @ 227:8c16ef123aee

Implement list literals in grammar
author Michael Pavone <pavone@retrodev.com>
date Sun, 29 Dec 2013 17:09:21 -0800
parents 6055f56d0e45
children decdf28a8517
line wrap: on
line source

#{
	_applyMatch <- :fun tomatch {
		fun: tomatch
	}
	_matchString <- :str tomatch {
		if: (tomatch isString?) {
			if: (tomatch length) < (str length) {
				#{
					matched? <- { false }
				}
			} else: {
				if: (tomatch length) > (str length) {
					tomatch <- tomatch from: 0 withLength: (str length)
				}
				if: str = tomatch {
					#{
						matched? <- { true }
						matchlen <- { str length }
						basicYield? <- { true }
						yield <- { str }
					}
				} else: {
					#{
						matched? <- { false }
					}
				}
			}
		} else: {
			#{
				matched? <- { false }
			}
		}
	}
	ifmatch:else <- :matchres :elseblock {
		if: (matchres matched?) {
			matchres
		} else: {
			elseblock:
		}
	}
	_makeMatchCall <- :matchexpr {
		if: (matchexpr nodeType) = "lambda" {
			#{
				valid? <- { true }
				matchcall <- quote: (_applyMatch: matchexpr tomatch)
			}
		} else: {
			if: (matchexpr nodeType) = "symbol" {
				#{
					valid? <- { true }
					matchcall <- quote: (matchexpr: tomatch)
				}
			} else: {
				if: (matchexpr nodeType) = "strlit" {
					#{
						valid? <- { true }
						matchcall <- quote: (_matchString: matchexpr tomatch)
					}
				} else: {
					if: (matchexpr nodeType) = "op" {
						if: (matchexpr opName) = "." {
							left <- (_makeMatchCall: (matchexpr left)) matchcall
							right <- (_makeMatchCall: (matchexpr right)) matchcall
							#{
								valid? <- { true }
								matchcall <- quote: (_applyMatch: :tomatch {
									lm <- left
									if: (lm matched?) {
										orig <- tomatch
										tomatch <- tomatch from: (lm matchlen)
										rm <- right
										if: (rm matched?) {
											total <- (rm matchlen) + (lm matchlen)
											#{
												matched? <- { true }
												matchlen <- { total }
												basicYield? <- { true }
												yield <- { orig from: 0 withLength: total }
											}
										} else: {
											rm
										}
									} else: {
										lm
									}
								} tomatch)
							}
						} else: {
							#{
								valid? <- { false }
								message <- "Unsupported operator " . (matchexpr opName)
							}
						}
					} else: {
						#{
							valid? <- { false }
							message <- "Unsupported AST node type " . (matchexpr nodeType)
						}
					}
				}
			}
		}
	}
	expandClass <- :chars {
		if: (chars length) > 0 {
			pos <- 0
			inverted <- false
			if: (chars byte: 0) = ("^" byte: 0) {
				pos <- 1
				inverted <- true
			}
			state_begin <- 0
			state_normal <- 1
			state_rangeend <- 2
			state <- state_begin
			out <- ""
			while: { pos < (chars byte_length)} do: {
				if: state = state_begin {
					out <- out . (chars from: pos withLength: 1)
					state <- state_normal
				} else: {
					if: state = state_normal {
						if: (chars byte: pos) = ("-" byte: 0) {
							state <- state_rangeend
						} else: {
							out <- out . (chars from: pos withLength: 1)
						}
					} else: {
						rangestart <- out byte: ((out byte_length) - 1)
						rangeend <- chars byte: pos
						if: rangeend < rangestart {
							tmp <- rangeend
							rangeend <- rangestart
							rangestart <- tmp
						}
						out <- out from: 0 withLength: ((out length) - 1)
						while: { rangestart <= rangeend } do: {
							out <- out . (rangestart asStringChar)
							rangestart <- rangestart + 1
						}
						state <- state_begin
					}
				}
				pos <- pos + 1
			}
			if: inverted {
				old <- out
				out <- ""
				//skip control characters for now
				cur <- 32
				while: { cur < 256 } do: {
					notfound <- true
					idx <- 0
					len <- (old length)
					while: { notfound && idx < len } do: {
						if: cur = (old byte: idx) {
							notfound <- false
						} else: {
							idx <- idx + 1
						}
					}
					if: notfound {
						out <- out . (cur asStringChar)
					}
					cur <- cur + 1
				}
			}
			out
		} else: {
			""
		}
	}
	charClass <- macro: :rawchars {
		eval: rawchars :chars {
			orig <- chars
			chars <- expandClass: chars
			//TODO: Use a more sophisticated approach for large classes
			quote: :tomatch {
				if: (tomatch isString?) {
					check <- 0

					nomatch <- true
					while: { nomatch && check < (chars byte_length) } do: {
						if: (tomatch byte: 0) = (chars byte: check) {
							nomatch <- false
						}
						check <- check + 1
					}
					if: nomatch {
						#{
							matched? <- { false }
						}
					} else: {
						#{
							matched? <- { true }
							matchlen <- { 1 }
							basicYield? <- { true }
							yield <- { tomatch from: 0 withLength: 1 }
						}
					}
				} else: {
					#{
						matched? <- { false }
					}
				}
			}
		} else: {
			print: "#error Argument to charClass macro must be a compile-time constant\n"
		}
	}

	_nPlus <- :matchexpr min {
		funexpr <- false
		valid <- false
		mc <- _makeMatchCall: matchexpr
		if: (mc valid?) {
			mcall <- mc matchcall
			quote: :tomatch {
				cur <- 0
				count <- 0
				n <- tomatch byte_length
				orig <- tomatch
				_match <- true
				allBasic? <- true
				yieldvals <- []
				while: { _match && cur < n } do: {
					res <- mcall
					_match <- res matched?
					if: _match {
						count <- count + 1
						//TODO: Use some kind of lightweight substring wrapper here
						tomatch <- tomatch from: (res matchlen)
						if: allBasic? {
							ifnot: (res basicYield?) {
								allBasic? <- false
								if: cur > 0 {
									yieldvals <- (orig from: 0 withLength: cur) | yieldvals
								}
								yieldvals <- (res yield) | yieldvals
							}
						} else: {
							yieldvals <- (res yield) | yieldvals
						}
						allBasic? <- allBasic? && (res basicYield?)
						cur <- cur + (res matchlen)
					}
				}
				if: count >= min {
					if: allBasic? {
						#{
							matched? <- { true }
							matchlen <- { cur }
							basicYield? <- { true }
							yield <- { orig from: 0 withLength: cur }
						}
					} else: {
						yieldvals <- yieldvals reverse
						#{
							matched? <- { true }
							matchlen <- { cur }
							basicYield? <- { false }
							yield <- { yieldvals }
						}
					}
				} else: {
					#{
						matched? <- { false }
					}
				}
			}
		} else: {
			print: "#error Invalid nPlus macro call: " . (mc message) . "\n"
		}
	}

	zeroPlus <- macro: :matchexpr {
		_nPlus: matchexpr 0
	}

	onePlus <- macro: :matchexpr {
		_nPlus: matchexpr 1
	}

	matchOne <- macro: :options {
		options <- (options value) map: :option {
			_makeMatchCall: option
		}
		body <- options foldr: (quote: #{
			matched? <- { false }
		}) with: :acc el {
			if: (el valid?) {
				mcall <- el matchcall
				quote: (ifmatch: mcall else: { acc })
			} else: {
				print: "#error Invalid matchOne macro call: " . (el message) . "\n"
				acc
			}
		}
		quote: :tomatch {
			body
		}
	}

	match <- macro: :matchexpr {
		mc <- _makeMatchCall: matchexpr
		if: (mc valid?) {
			mcall <- mc matchcall
			quote: :tomatch {
				mcall
			}
		} else: {
			print: "#error Invalid macth macro call: " . (mc message) . "\n"
		}
	}

	match:yield <- macro: :matchexpr :ylambda {
		mc <- _makeMatchCall: matchexpr
		if: (mc valid?) {
			mcall <- mc matchcall
			quote: :tomatch {
				res <- mcall
				if: (res matched?) {
					#{
						matched? <- { true }
						matchlen <- { res matchlen }
						basicYield? <- { false }
						yield <- ylambda
					}
				} else: {
					res
				}
			}
		} else: {
			print: "#error Invalid macth:yield macro call: " . (mc message) . "\n"
		}
	}

	match:where:yield <- macro: :matchexpr :whereclause :ylambda {
		syms <- []
		withwhere <- (whereclause expressions) fold: (quote: :tomatch {}) with: :acc el {

			if: (el nodeType) = "assignment" {
				valassign <- quote: (val <- false)
				valsym <- (valassign) symbol
				valsym <- valsym name!: (valsym name) . ((el symbol) name)
				valassign <- valassign symbol!: valsym
				acc addExpression: valassign

				matchassign <- quote: (hasmatch <- false)
				matchsym <- (matchassign) symbol
				matchsym <- matchsym name!: (matchsym name) . ((el symbol) name)
				matchassign <- matchassign symbol!: matchsym
				acc addExpression: matchassign

				mc <- _makeMatchCall: (el expression)

				if: (mc valid?) {
					mcall <- mc matchcall
					matchfun <- quote: :tomatch {
						if: matchsym {
							if: valsym = tomatch {
								#{
									matched? <- { true }
									matchlen <- { valsym length }
									basicYield? <- { true } //TODO: Check if this is correct
									yield <- { valsym }
								}
							} else: {
								#{
									matched? <- { false }
								}
							}
						} else: {
							mr <- mcall
							if: (mr matched?) {
								matchsym <- true
								valsym <- (mr yield)
							}
							mr
						}
					}
					acc <- acc addExpression: (el expression!: matchfun)
					syms <- list node: #{
						orig <- el symbol
						matchval <- valsym
					} withTail: syms
					acc
				} else: {
					print: "#error " . ((el symbol) name) . " does not have a valid match expression: " . (mc message) . "\n"
				}

			} else: {
				print: "#error Nodes of type " . (el nodeType) . " are not allowed in match where clauses\n"
				acc
			}
		}
		mcMain <- _makeMatchCall: matchexpr
		if: (mcMain valid?) {
			mcall <- mcMain matchcall
			withwhere addExpression: (quote: (matchres <- mcall))
			successLambda <- quote: {
				//Extra assignments will be added here
				mlen <- matchres matchlen
				#{
					matched? <- { true }
					matchlen <- { mlen }
					basicYield? <- { false }
					yield <- ylambda
				}
			}
			sucexp <- syms fold: (successLambda expressions) with: :acc el {
				lsym <- el orig
				rsym <- el matchval
				(quote: (lsym <- rsym)) | acc
			}
			successLambda <- successLambda expressions!: sucexp
			withwhere addExpression: (quote: (if: (matchres matched?) successLambda else: {
				matchres
			}))
			withwhere
		} else: {
			print: "#error Error in main match expression of match:where:yield: " . (mcMain message) . "\n"
		}
	}

	binaryOps:withHigherPrec <- macro: :oplist :higher {
		quote: (match: Left . Pieces where: {
			Left <- match: higher
			Pieces <- zeroPlus: (match: hws . Op . Right where: {
			Op <- matchOne: oplist
			Right <- match: higher
			} yield: {
				#{
					op <- Op
					right <- Right
				}
			})
		} yield: {
			_processOpPieces: Left Pieces
		})
	}

	opexpr <- binaryOps: ["&&" "||"] withHigherPrec: compare
	compare <- binaryOps: ["<=" ">=" "<" ">" "=" "!="] withHigherPrec: maybecons
	maybecons <- matchOne: [
		consop
		addsub
	]
	consop <- match: Left . hws . "|" . Right where: {
		Left <- match: addsub
		Right <- match: maybecons
	} yield: {
		#{
			left <- Left
			op <- "|"
			right <- Right
			string <- {
				(string: left) . " " . op . " " . right
			}
		}
	}
	addsub <- binaryOps: ["+" "-" "."] withHigherPrec: muldiv
	muldiv <- binaryOps: ["*" "/" "%"] withHigherPrec: primlitsym

	//TODO: Implement operator expressions


	_alpha <- charClass: "a-zA-Z"
	alpha <- zeroPlus: _alpha
	alphaNum <- zeroPlus: (charClass: "a-zA-Z0-9")

	blockComment <- match: "/*" . (zeroPlus: (matchOne: [(charClass: "^*") "*" . (charClass: "^/")])) . "*/" yield: { false }

	hws <- zeroPlus: (matchOne: [
		(charClass: " \t")
		blockComment
	])

	ws <- zeroPlus: (matchOne: [
		(charClass: " \n\t\r")
		"//" . (zeroPlus: (charClass: "^\n")) . "\n"
		blockComment
	])

	escape <- matchOne: [
		(match: "\\n" yield: {"\n"})
		(match: "\\r" yield: {"\n"})
		(match: "\\t" yield: {"\n"})
		(match: "\\\\" yield: {"\\"})
		(match: "\\\"" yield: {"\""})
	]

	string <- match: "\"" . Chars . "\"" where: {
		Chars  <- zeroPlus: (matchOne: [
			(charClass: "^\"\\")
			escape
		])
	} yield: {
		Chars join: ""
	}

	bdigit <- matchOne: [
		(match: "0" yield: {0i64})
		(match: "1" yield: {1i64})
	]

	digit <- matchOne: [
		bdigit
		(match: "2" yield: {2i64})
		(match: "3" yield: {3i64})
		(match: "4" yield: {4i64})
		(match: "5" yield: {5i64})
		(match: "6" yield: {6i64})
		(match: "7" yield: {7i64})
		(match: "8" yield: {8i64})
		(match: "9" yield: {9i64})
	]

	hdigit <- matchOne: [
		digit
		(match: (charClass: "aA") yield: {10i64})
		(match: (charClass: "bB") yield: {11i64})
		(match: (charClass: "cC") yield: {12i64})
		(match: (charClass: "dD") yield: {13i64})
		(match: (charClass: "eE") yield: {14i64})
		(match: (charClass: "fF") yield: {15i64})
	]

	binary <- match: "0b" . Digits . Suffix where: {
		Digits <- onePlus: bdigit
		Suffix <- matchOne: [
			(charClass: "ui") . (matchOne: ["8" "16" "32" "64"])
			""
		]
	} yield: {
		num <- Digits fold: 0 with: :acc el {
			acc * 2i64 + el
		}
		signed <- true
		litbits <- 32
		if: (Suffix length) > 0 {
			if: (Suffix from: 0 withLength: 1) = "u" {
				signed <- false
			}
			litbits <- (Suffix from: 1) int32
		}
		#{
			litval <- num
			signed? <- signed
			bits <- litbits
			string <- {
				str <- "0b"
				i <- bits - 1
				printzero <- false
				while: { i >= 0 } do: {
					str <- str . (if: (lshift: 1 by: i) and num > 0 {
						printzero <- true
						"1"
					} else: {
						if: printzero {"0"} else: {""}
					})
					i <- i - 1
				}
				if: (not: signed?) || bits != 32 {
					str <- str . (if: signed { "i" } else: { "u" }) . bits
				}
				str
			}
		}
	}

	decimal <- match: Sign . Digits . Suffix where: {
		Sign <- matchOne: ["-" ""]
		Digits <- onePlus: digit
		Suffix <- matchOne: [
			(charClass: "ui") . (matchOne: ["8" "16" "32" "64"])
			""
		]
	} yield: {
		num <- Digits fold: 0 with: :acc el {
			acc * 10i64 + el
		}
		if: Sign = "-" {
			num <- 0i64 - num
		}
		signed <- true
		litbits <- 32
		if: (Suffix length) > 0 {
			if: (Suffix from: 0 withLength: 1) = "u" {
				signed <- false
			}
			print: (Suffix from: 1) . "\n"
			litbits <- (Suffix from: 1) int32
		}
		#{
			litval <- num
			signed? <- signed
			bits <- litbits
			string <- {
				str <- string: litval
				if: (not: signed?) || bits != 32 {
					str <- str . (if: signed? {"i"} else: {"u"}) . bits
				}
				str
			}
		}
	}

	hexlit <- match: "0x" . Digits . Suffix where: {
		Digits <- onePlus: hdigit
		Suffix <- matchOne: [
			(charClass: "ui") . (matchOne: ["8" "16" "32" "64"])
			""
		]
	} yield: {
		num <- Digits fold: 0 with: :acc el {
			acc * 16i64 + el
		}
		signed <- true
		litbits <- 32
		if: (Suffix length) > 0 {
			if: (Suffix from: 0 withLength: 1) = "u" {
				signed <- false
			}
			litbits <- (Suffix from: 1) int32
		}
		#{
			litval <- num
			signed? <- signed
			bits <- litbits
			string <- {
				str <- "0x" . (hex: litval)
				if: (not: signed?) || bits != 32 {
					str <- str . (if: signed? {"i"} else: {"u"}) . bits
				}
				str
			}
		}
	}

	symexpr <- match: Name where: {
		Name <- match: (onePlus: (charClass: "a-zA-Z_@!?")) . (zeroPlus: ((matchOne: [":" ""]) . (charClass: "a-zA-Z_@!?0-9")))
	} yield: {
		#{
			name <- Name
			string <- {
				name
			}
		}
	}

	namepart <- match: hws . Symbol . ":" where: {
		Symbol <- match: symexpr
	} yield: {
		#{
			isNamePart? <- { true }
			val <- Symbol name
		}
	}

	argpart <- matchOne: [
		match: namepart
		match: Arg where: {
			Arg <- opexpr
		} yield: {
			#{
				isNamePart? <- { false }
				val <- Arg
			}
		}
	]

	funcall <- match: hws . Initial . Parts where: {
		Initial <- match: namepart
		Parts <- onePlus: argpart
	} yield: {
		Initial | Parts foldr: #{
			name <- ""
			args <- []
		} with: :acc el {
			nextName <- acc name
			nextArgs <- acc args
			if: (el isNamePart?) {
				nextName <- if: ((acc name) length) > 0 { (el val) . ":" . (acc name) } else: { el val }
			} else: {
				nextArgs <- (el val) | nextArgs
			}

			#{
				name <- nextName
				args <- nextArgs
				string <- {
					str <- ""
					curArgs <- args
					nameParts <- name splitOn: ":"
					foreach: nameParts :part {
						str <- str . part . ":"
						if: (not: (curArgs empty?)) {
							str <- str . " " . (curArgs value)
							curArgs <- curArgs tail
						}
					}
					str
				}
			}
		}
	}

	unarymeth <- match: Receiver . hws . Method where: {
		Receiver <- match: opexpr
		Method <- match: symexpr
	} yield: {
		#{
			receiver <- Receiver
			name <- Method name
			args <- []
		}
	}

	methcall <- match: Receiver . hws . Rest where: {
		Receiver <- match: opexpr
		Rest <- match: funcall
	} yield: {
		#{
			receiver <- Receiver
			name <- Rest name
			args <- Rest args
			string <- {
				nameParts <- name splitOn: ":"
				curArgs <- args
				str <- (string: receiver) . " "
				foreach: nameParts :part {
					str <- str . part . ":"
					if: (not: (curArgs empty?)) {
						str <- str . " " . (curArgs value)
						curArgs <- curArgs tail
					}
				}
				str
			}
		}
	}
	_processOpPieces <- :Left Pieces {
		if: (Pieces length) > 0 {
			Pieces fold: Left with: :acc piece {
				#{
					left <- acc
					op <- piece op
					right <- piece right
					string <- {
						(string: left) . " " . op . " " . right
					}
				}
			}
		} else: {
			Left
		}
	}

	expr <- match: (hws . Expr . ws) where: {
		Expr <- matchOne: [
			funcall
			methcall
			unarymeth
			opexpr
		]
	} yield: {
		Expr
	}

	lexpr <- match: (hws . Expr . ws) where: {
		Expr <- matchOne: [
			funcall
			methcall
			opexpr
		]
	} yield: {
		Expr
	}

	assignment <- match: ws . Symbol . hws . "<-" . Expr where: {
		Symbol <- match: symexpr
		Expr <- match: expr
	} yield: {
		#{
			assign <- Expr
			to <- Symbol
			string <- {
				(string: to) . " <- " . assign
			}
		}
	}

	object <- match: "#{" . ws . Messages . "}" where: {
		Messages <- zeroPlus: assignment
	} yield: {
		#{
			messages <- Messages
			string <- {
				"#{\n\t". ((messages map: :el {
					string: el
				}) join: "\n\t") . "\n}"
			}
		}
	}

	listlit <- match: "[" . ws . Els . "]" where: {
		Els <- zeroPlus: lexpr
	} yield: {
		//Handle limitation of zeroPlus macro
		if: (Els length) = 0 {
			Els <- []
		}
		#{
			litval <- Els
			string <- {
				"[\n\t". ((litval map: :el {
					string: el
				}) join: "\n\t") . "\n]"
			}
		}
	}

	primlitsym <- match: hws . Lit where: {
		Lit <- matchOne: [
			hexlit
			binary
			decimal
			symexpr
			object
			listlit
		]
	} yield: {
		Lit
	}

	testmatchintlit <- :val matchfun {
		res <- matchfun: val
		if: (res matched?) {
			y <- res yield
			print: val . " matched with litval " . (y litval) . ", bits " . (y bits) . " and singned? " . (y signed?) . "\n"
		} else: {
			print: val . " did not match\n"
		}
	}

	main <- {
		cmatch <- alpha: "czx0123"
		zeromatch <- alpha: "01234"
		if: (cmatch matched?) {
			print: "czx0123 matched with length " . (cmatch matchlen) . "\n"
		} else: {
			print: "czx0123 didn't match\n"
		}
		if: (zeromatch matched?) {
			print: "0123 matched with length " . (zeromatch matchlen) . "\n"
		} else: {
			print: "0123 didn't match\n"
		}
		zeromatchanum <- alphaNum: "01234"
		if: (zeromatchanum matched?) {
			print: "01234 matched with length " . (zeromatchanum matchlen) . "\n"
		} else: {
			print: "01234 didn't match\n"
		}
		stuff <- " \t/* blah blah blah * blah */ foo"
		hwsmatch <- hws: stuff
		if: (hwsmatch matched?) {
			print: "'" . (stuff from: (hwsmatch matchlen)) . "' found after hws\n"
		} else: {
			print: stuff . " did not match hws rule\n"
		}
		tmatch <- digit: "3"
		if: (tmatch matched?) {
			print: "3 matched with yield " . (tmatch yield) . ", yield + 1 = " . ((tmatch yield) + 1) . "\n"
		} else: {
			print: "3 did not match\n"
		}

		testmatchintlit: "345" :s {decimal: s}
		testmatchintlit: "-567" :s {decimal: s}
		testmatchintlit: "123u16" :s {decimal: s}
		testmatchintlit: "0x20" :s {hexlit: s}
		testmatchintlit: "0x42u64" :s {hexlit: s}
		testmatchintlit: "0b10101" :s {binary: s}
		code <- "#{ foo <- 123 > 0x42 && 42 < 104\n bar <- 0xABC + 0b1010101\n baz <- 0b1010 * 5\n qux <- fo: 38 shizzle: bam\n quine <- 123 | [4 5 6 fiddle sticks]\n}"
		codem <- expr: code
		if: (codem matched?) {
			print: code . "\nmatched with yield:\n" . (codem yield) . "\n"
		} else: {
			print: code . "\ndid not match\n"
		}
	}
}