Mercurial > repos > tabletprog
view modules/parser.tp @ 331:61f5b794d939
Breaking change: method call syntax now always uses the syntactic receiver as the actual receiver. This makes its behavior different from function call syntax, but solves some problems with methods being shadowed by local variables and the like.
author | Michael Pavone <pavone@retrodev.com> |
---|---|
date | Sat, 28 Mar 2015 14:21:04 -0700 |
parents | 9d93e65a34be |
children | 27477c8c2823 |
line wrap: on
line source
{ _matchid <- 0 getMatchId <- { id <- _matchid _matchid <- _matchid + 1 id } matchMemo <- { _posdata <- #[] _checkInitData <- :len { len <- len + 1 while: { (_posdata length) < len } do: { _posdata append: (dict hash) } } #{ memo:at:withId:length <- :val :at :id :len { _checkInitData: len (_posdata get: at) set: id val self } getMemo:at:else <- :id :at :else { if: (_posdata length) > at { (_posdata get: at) ifget: id :val { val } else: else } else: else } } } light:from:withLength <- :_base :_start :_len { _matchmemo <- matchMemo: if: (not: (_base isBasicString?)) { _start <- _start + (_base start) _matchmemo <- _base memoData _base <- _base base } _needsflat? <- true _flat <- false #{ //TODO: UTF-8 support length <- { _len } byte_length <- { _len } string <- { if: _needsflat? { _needsflat? <- false _flat <- _base from: _start withLength: _len } _flat } from:withLength <- :s :l { if: (l + s) > _len { l <- _len - s } _base from: (_start + s) withLength: l } from <- :s { from: s withLength: (_len - s) } byte <- :index { _base byte: (index + _start) } = <- :other { if: (other length) = _len { ostart <- 0 if: (not: (other isBasicString?)) { ostart <- other start other <- other _base } res <- _base compareSub: other _start ostart _len res = 0 } } . <- :other { (string: self) . other } int32 <- { (string: self) int32 } splitOn <- :delim { (string: self) splitOn: delim } isString? <- { true } isBasicString? <- { false } base <- { _base } start <- { _start } memoData <- { _matchmemo } memo:at:withId <- :val :at :id { _matchmemo memo: val at: (at + _start) withId: id length: (_base length) self } getMemo:at:else <- :id :at :else { _matchmemo getMemo: id at: (at + _start) else: else } } } light:from <- :base :start { light: base from: start withLength: (base length) - start } _applyMatch <- :fun tomatch { fun: tomatch } _matchString <- :str tomatch { if: (tomatch isString?) { if: (tomatch length) < (str length) { false } else: { if: (tomatch length) > (str length) { tomatch <- tomatch from: 0 withLength: (str length) } if: tomatch = str { #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { str length } basicYield? <- { true } yield <- { str } } } else: { false } } } else: { false } } _makeMatchCall <- :matchexpr { if: (matchexpr nodeType) = "lambda" { #{ valid? <- { true } matchcall <- quote: (_applyMatch: matchexpr tomatch) } } else: { if: (matchexpr nodeType) = "symbol" { #{ valid? <- { true } matchcall <- quote: (matchexpr: tomatch) } } else: { if: (matchexpr nodeType) = "strlit" { #{ valid? <- { true } matchcall <- quote: (_matchString: matchexpr tomatch) } } else: { if: (matchexpr nodeType) = "op" { if: (matchexpr opName) = "." { left <- (_makeMatchCall: (matchexpr left)) matchcall right <- (_makeMatchCall: (matchexpr right)) matchcall #{ valid? <- { true } matchcall <- quote: (_applyMatch: :tomatch { lm <- left if: lm { orig <- tomatch tomatch <- light: tomatch from: (lm matchlen) rm <- right if: rm { total <- (rm matchlen) + (lm matchlen) #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { total } basicYield? <- { true } yield <- { orig from: 0 withLength: total } } } else: { rm } } else: { lm } } tomatch) } } else: { #{ valid? <- { false } message <- "Unsupported operator " . (matchexpr opName) } } } else: { #{ valid? <- { false } message <- "Unsupported AST node type " . (matchexpr nodeType) } } } } } } _nPlus <- :matchexpr min { funexpr <- false valid <- false mc <- _makeMatchCall: matchexpr if: (mc valid?) { mcall <- mc matchcall quote: :tomatch { cur <- 0 count <- 0 n <- tomatch byte_length orig <- tomatch _match <- true allBasic? <- true yieldvals <- [] while: { _match && cur < n } do: { res <- mcall _match <- if: res { count <- count + 1 //TODO: Use some kind of lightweight substring wrapper here tomatch <- light: tomatch from: (res matchlen) if: allBasic? { ifnot: (res basicYield?) { allBasic? <- false if: cur > 0 { yieldvals <- (orig from: 0 withLength: cur) | yieldvals } yieldvals <- (res yield) | yieldvals } } else: { yieldvals <- (res yield) | yieldvals } allBasic? <- allBasic? && (res basicYield?) cur <- cur + (res matchlen) true } } if: count >= min { if: allBasic? { #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { cur } basicYield? <- { true } yield <- { orig from: 0 withLength: cur } } } else: { yieldvals <- yieldvals reverse #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { cur } basicYield? <- { false } yield <- { yieldvals } } } } else: { false } } } else: { print: "#error Invalid nPlus macro call: " . (mc message) . "\n" } } _expandClass <- :chars { if: (chars length) > 0 { pos <- 0 inverted <- false if: (chars byte: 0) = ("^" byte: 0) { pos <- 1 inverted <- true } state_begin <- 0 state_normal <- 1 state_rangeend <- 2 state <- state_begin out <- "" while: { pos < (chars byte_length)} do: { if: state = state_begin { out <- out . (chars from: pos withLength: 1) state <- state_normal } else: { if: state = state_normal { if: (chars byte: pos) = ("-" byte: 0) { state <- state_rangeend } else: { out <- out . (chars from: pos withLength: 1) } } else: { rangestart <- out byte: ((out byte_length) - 1) rangeend <- chars byte: pos if: rangeend < rangestart { tmp <- rangeend rangeend <- rangestart rangestart <- tmp } out <- out from: 0 withLength: ((out length) - 1) while: { rangestart <= rangeend } do: { out <- out . (rangestart asStringChar) rangestart <- rangestart + 1 } state <- state_begin } } pos <- pos + 1 } if: inverted { old <- out out <- "" cur <- 0 while: { cur < 256 } do: { notfound <- true idx <- 0 len <- (old length) while: { notfound && idx < len } do: { if: cur = (old byte: idx) { notfound <- false } else: { idx <- idx + 1 } } if: notfound { out <- out . (cur asStringChar) } cur <- cur + 1 } } out } else: { "" } } _charClass <- :chars { orig <- chars chars <- _expandClass: chars charmap <- "" char <- 0 while: { char < 256 } do: { mchar <- 0 found <- false while: { mchar < (chars byte_length)} do: { if: (chars byte: mchar) = char { found <- true mchar <- chars byte_length } mchar <- mchar + 1 } charmap <- charmap . (if: found { "t" } else: { "f" }) char <- char + 1 } t <- "t" byte: 0 quote: :tomatch { if: (tomatch isString?) { if: (charmap byte: (tomatch byte: 0)) = t { #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { 1 } basicYield? <- { true } yield <- { tomatch from: 0 withLength: 1 } } } else: { false } } else: { false } } } #{ charClass <- macro: :rawchars { eval: rawchars :chars { _charClass: chars } else: { print: "#error Argument to charClass macro must be a compile-time constant\n" } } zeroPlus <- macro: :matchexpr { _nPlus: matchexpr 0 } onePlus <- macro: :matchexpr { _nPlus: matchexpr 1 } matchOne <- macro: :options { myid <- getMatchId: options <- (options value) map: :option { _makeMatchCall: option } body <- options foldr: (quote: false) with: :acc el { if: (el valid?) { mcall <- el matchcall quote: (ifnot: mcall { acc }) } else: { print: "#error Invalid matchOne macro call: " . (el message) . "\n" acc } } quote: :tomatch { tomatch <- light: tomatch from: 0 tomatch getMemo: myid at: 0 else: { ret <- body tomatch memo: ret at: 0 withId: myid ret } } } match <- macro: :matchexpr { mc <- _makeMatchCall: matchexpr if: (mc valid?) { mcall <- mc matchcall quote: :tomatch { mcall } } else: { print: "#error Invalid macth macro call: " . (mc message) . "\n" } } match:yield <- macro: :matchexpr :ylambda { mc <- _makeMatchCall: matchexpr if: (mc valid?) { mcall <- mc matchcall quote: :tomatch { res <- mcall if: res { #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { res matchlen } basicYield? <- { false } yield <- ylambda } } else: { res } } } else: { print: "#error Invalid macth:yield macro call: " . (mc message) . "\n" } } match:where:yield <- macro: :matchexpr :whereclause :ylambda { myid <- getMatchId: syms <- [] withwhere <- (whereclause expressions) fold: (quote: {}) with: :acc el { if: (el nodeType) = "assignment" { valassign <- quote: (val <- false) valsym <- (valassign) symbol valsym <- valsym name!: (valsym name) . ((el symbol) name) valassign <- valassign symbol!: valsym acc addExpression: valassign matchassign <- quote: (hasmatch <- false) matchsym <- (matchassign) symbol matchsym <- matchsym name!: (matchsym name) . ((el symbol) name) matchassign <- matchassign symbol!: matchsym acc addExpression: matchassign mc <- _makeMatchCall: (el expression) if: (mc valid?) { mcall <- mc matchcall matchfun <- quote: :tomatch { if: matchsym { if: valsym = tomatch { #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { valsym length } basicYield? <- { true } //TODO: Check if this is correct yield <- { valsym } } } else: { false } } else: { mr <- mcall if: mr { matchsym <- true valsym <- (mr yield) } mr } } acc <- acc addExpression: (el expression!: matchfun) syms <- list node: #{ orig <- el symbol matchval <- valsym } withTail: syms acc } else: { print: "#error " . ((el symbol) name) . " does not have a valid match expression: " . (mc message) . "\n" } } else: { print: "#error Nodes of type " . (el nodeType) . " are not allowed in match where clauses\n" acc } } mcMain <- _makeMatchCall: matchexpr if: (mcMain valid?) { mcall <- mcMain matchcall withwhere addExpression: (quote: (matchres <- mcall)) successLambda <- quote: { //Extra assignments will be added here mlen <- matchres matchlen #{ if <- :self trueblock { trueblock: } ifnot <- :self falseblock { self } if:else <- :self trueblock :elseblock { trueblock: } matchlen <- { mlen } basicYield? <- { false } yield <- ylambda } } sucexp <- syms fold: (successLambda expressions) with: :acc el { lsym <- el orig rsym <- el matchval (quote: (lsym <- rsym)) | acc } successLambda <- successLambda expressions!: sucexp withwhere addExpression: (quote: (ret <- if: matchres successLambda else: { matchres })) withwhere addExpression: (quote: (tomatch memo: ret at: 0 withId: myid)) withwhere addExpression: (quote: ret) quote: :tomatch { tomatch <- light: tomatch from: 0 tomatch getMemo: myid at: 0 else: withwhere } } else: { print: "#error Error in main match expression of match:where:yield: " . (mcMain message) . "\n" } } binaryOps:withHigherPrec <- macro: :oplist :higher { quote: (match: Left . Pieces where: { Left <- match: higher Pieces <- zeroPlus: (match: hws . Op . Right where: { Op <- matchOne: oplist Right <- match: higher } yield: { #{ op <- Op right <- Right } }) } yield: { _processOpPieces: Left Pieces }) } opexpr <- binaryOps: ["&&" "||"] withHigherPrec: compare compare <- binaryOps: ["<=" ">=" "<" ">" "=" "!="] withHigherPrec: maybecons maybecons <- matchOne: [ consop addsub ] consop <- match: Left . hws . "|" . Right where: { Left <- match: addsub Right <- match: maybecons } yield: { ast binaryOp: "|" withArgs: Left Right } addsub <- binaryOps: ["+" "-" "."] withHigherPrec: muldiv muldiv <- binaryOps: ["*" "/" "%"] withHigherPrec: primlitsym _alpha <- charClass: "a-zA-Z" alpha <- zeroPlus: _alpha alphaNum <- zeroPlus: (charClass: "a-zA-Z0-9") blockComment <- match: "/*" . (zeroPlus: (matchOne: [(charClass: "^*") "*" . (charClass: "^/")])) . "*/" yield: { false } hws <- zeroPlus: (matchOne: [ (charClass: " \t") blockComment ]) ws <- zeroPlus: (matchOne: [ (charClass: " \n\t\r") "//" . (zeroPlus: (charClass: "^\n")) . "\n" blockComment ]) escape <- matchOne: [ (match: "\\n" yield: {"\n"}) (match: "\\r" yield: {"\n"}) (match: "\\t" yield: {"\n"}) (match: "\\\\" yield: {"\\"}) (match: "\\\"" yield: {"\""}) ] string <- match: "\"" . Chars . "\"" where: { Chars <- zeroPlus: (matchOne: [ match: Reg where: { Reg <- charClass: "^\"\\" } yield: { Reg } escape ]) } yield: { if: (Chars length) = 0 { Chars <- [] } ast stringLit: (Chars join: "") } bdigit <- matchOne: [ (match: "0" yield: {0i64}) (match: "1" yield: {1i64}) ] digit <- matchOne: [ bdigit (match: "2" yield: {2i64}) (match: "3" yield: {3i64}) (match: "4" yield: {4i64}) (match: "5" yield: {5i64}) (match: "6" yield: {6i64}) (match: "7" yield: {7i64}) (match: "8" yield: {8i64}) (match: "9" yield: {9i64}) ] hdigit <- matchOne: [ digit (match: (charClass: "aA") yield: {10i64}) (match: (charClass: "bB") yield: {11i64}) (match: (charClass: "cC") yield: {12i64}) (match: (charClass: "dD") yield: {13i64}) (match: (charClass: "eE") yield: {14i64}) (match: (charClass: "fF") yield: {15i64}) ] binary <- match: "0b" . Digits . Suffix where: { Digits <- onePlus: bdigit Suffix <- matchOne: [ (charClass: "ui") . (matchOne: ["8" "16" "32" "64"]) "" ] } yield: { num <- Digits fold: 0 with: :acc el { acc * 2i64 + el } signed <- true litbits <- 32 if: (Suffix length) > 0 { if: (Suffix from: 0 withLength: 1) = "u" { signed <- false } litbits <- (Suffix from: 1) int32 } ast intLit: num withBits: litbits andBase: 2 signed?: signed } decimal <- match: Sign . Digits . Suffix where: { Sign <- matchOne: ["-" ""] Digits <- onePlus: digit Suffix <- matchOne: [ (charClass: "ui") . (matchOne: ["8" "16" "32" "64"]) "" ] } yield: { num <- Digits fold: 0 with: :acc el { acc * 10i64 + el } if: Sign = "-" { num <- 0i64 - num } signed <- true litbits <- 32 if: (Suffix length) > 0 { if: (Suffix from: 0 withLength: 1) = "u" { signed <- false } litbits <- (Suffix from: 1) int32 } ast intLit: num withBits: litbits andBase: 10 signed?: signed } hexlit <- match: "0x" . Digits . Suffix where: { Digits <- onePlus: hdigit Suffix <- matchOne: [ (charClass: "ui") . (matchOne: ["8" "16" "32" "64"]) "" ] } yield: { num <- Digits fold: 0 with: :acc el { acc * 16i64 + el } signed <- true litbits <- 32 if: (Suffix length) > 0 { if: (Suffix from: 0 withLength: 1) = "u" { signed <- false } litbits <- (Suffix from: 1) int32 } ast intLit: num withBits: litbits andBase: 16 signed?: signed } symexpr <- match: Name where: { Name <- match: (onePlus: (charClass: "a-zA-Z_@!?")) . (zeroPlus: ((matchOne: [":" ""]) . (charClass: "a-zA-Z_@!?0-9"))) } yield: { ast symbol: Name } namepart <- match: hws . Symbol . ":" where: { Symbol <- match: symexpr } yield: { #{ isNamePart? <- { true } val <- Symbol name } } argpart <- matchOne: [ match: namepart match: Arg where: { Arg <- opexpr } yield: { #{ isNamePart? <- { false } val <- Arg } } ] funcall <- match: hws . Initial . Parts where: { Initial <- match: namepart Parts <- zeroPlus: argpart } yield: { if: (Parts length) = 0 { Parts <- [] } combined <- Initial | Parts foldr: #{ name <- "" args <- [] } with: :acc el { nextName <- acc name nextArgs <- acc args if: (el isNamePart?) { nextName <- if: ((acc name) length) > 0 { (el val) . ":" . (acc name) } else: { el val } } else: { nextArgs <- (el val) | nextArgs } #{ name <- nextName args <- nextArgs } } ast funcall: (ast symbol: (combined name)) withArgs: (combined args) hasReceiver?: false } unarymeth <- match: Receiver . hws . Method where: { Receiver <- match: opexpr Method <- match: symexpr } yield: { ast funcall: Method withArgs: [Receiver] hasReceiver?: true } methcall <- match: Receiver . hws . Rest where: { Receiver <- match: opexpr Rest <- match: funcall } yield: { ast funcall: (Rest tocall) withArgs: Receiver | (Rest args) hasReceiver?: true } _processOpPieces <- :Left Pieces { if: (Pieces length) > 0 { Pieces fold: Left with: :acc piece { ast binaryOp: (piece op) withArgs: acc (piece right) } } else: { Left } } expr <- match: (hws . Expr . ws) where: { Expr <- matchOne: [ funcall methcall unarymeth assignment opexpr ] } yield: { Expr } lexpr <- match: (hws . Expr . ws) where: { Expr <- matchOne: [ funcall methcall opexpr ] } yield: { Expr } opsym <- match: Name where: { Name <- matchOne: ["&&" "||" "<=" ">=" "<" ">" "=" "!=" "=" "-" "." "*" "/" "%" "|"] } yield: { ast symbol: Name } assignment <- match: ws . Symbol . hws . "<-" . Expr where: { Symbol <- matchOne: [ symexpr opsym ] Expr <- match: expr } yield: { ast assign: Expr to: Symbol } object <- match: "#{" . ws . Messages . ws . "}" where: { Messages <- zeroPlus: (match: ws . El where: { El <- matchOne: [ assignment funcall ] } yield: { El }) } yield: { if: (Messages length) = 0 { Messages <- [] } ast object: Messages } listlit <- match: "[" . ws . Els . "]" where: { Els <- zeroPlus: lexpr } yield: { //Handle limitation of zeroPlus macro if: (Els length) = 0 { Els <- [] } ast seqLit: Els array?: false } arraylit <- match: "#[" . ws . Els . "]" where: { Els <- zeroPlus: lexpr } yield: { //Handle limitation of zeroPlus macro if: (Els length) = 0 { Els <- [] } ast seqLit: Els array?: true } argname <- match: hws . Pre . Initial . Rest where: { Pre <- matchOne: [":" ""] Initial <- onePlus: (charClass: "a-zA-Z_!?@") Rest <- zeroPlus: (charClass: "a-zA-Z_!?@0-9") } yield: { Pre . Initial . Rest } lambda <- match: hws . Arglist . hws . "{" . ws . Exprs . "}" where: { Arglist <- matchOne: [ match: ":" . First . Rest where: { First <- match: symexpr Rest <- zeroPlus: argname } yield: { if: (Rest length) = 0 { Rest <- [] } ":" . (First name) | Rest } match: "" yield: { [] } ] Exprs <- zeroPlus: expr } yield: { if: (Exprs length) = 0 { Exprs <- [] } ast lambda: Exprs withArgs: Arglist } parenexp <- match: "(" . ws . Expr . ws . ")" where: { Expr <- match: expr } yield: { Expr } primlitsym <- match: hws . Lit where: { Lit <- matchOne: [ hexlit binary decimal symexpr lambda object listlit arraylit string parenexp ] } yield: { Lit } top <- matchOne: [ object lambda ] testmatchintlit <- :tomatch matchfun { res <- matchfun: tomatch if: res { y <- res yield print: tomatch . " matched with litval " . (y val) . ", bits " . (y bits) . " and singned? " . (y signed?) . "\n" } else: { print: tomatch . " did not match\n" } } main <- :args { cmatch <- alpha: "czx0123" zeromatch <- alpha: "01234" if: cmatch { print: "czx0123 matched with length " . (cmatch matchlen) . "\n" } else: { print: "czx0123 didn't match\n" } if: zeromatch { print: "0123 matched with length " . (zeromatch matchlen) . "\n" } else: { print: "0123 didn't match\n" } zeromatchanum <- alphaNum: "01234" if: zeromatchanum { print: "01234 matched with length " . (zeromatchanum matchlen) . "\n" } else: { print: "01234 didn't match\n" } stuff <- " \t/* blah blah blah * blah */ foo" hwsmatch <- hws: stuff if: hwsmatch { print: "'" . (stuff from: (hwsmatch matchlen)) . "' found after hws\n" } else: { print: stuff . " did not match hws rule\n" } tmatch <- digit: "3" if: tmatch { print: "3 matched with yield " . (tmatch yield) . ", yield + 1 = " . ((tmatch yield) + 1) . "\n" } else: { print: "3 did not match\n" } testmatchintlit: "345" :s {decimal: s} testmatchintlit: "-567" :s {decimal: s} testmatchintlit: "123u16" :s {decimal: s} testmatchintlit: "0x20" :s {hexlit: s} testmatchintlit: "0x42u64" :s {hexlit: s} testmatchintlit: "0b10101" :s {binary: s} code <- "#{ foo <- 123 > 0x42 && 42 < 104\n bar <- 0xABC + 0b1010101\n baz <- 0b1010 * 5\n qux <- fo: 38 shizzle: bam\n quine <- 123 | [4 5 6 fiddle sticks]\n quizzle <- #[receiver meth: arg]\n blah <- :arg arg2 :arg3 { arg + arg2 + arg3 }}" if: (args length) > 1 { file <- os open: (args get: 1) (os O_RDONLY) code <- "" chunksize <- 1024 readsize <- chunksize while: { readsize = chunksize} do: { seg <- os read: file chunksize code <- code . seg readsize <- seg byte_length } } codem <- top: code if: codem { print: code . "\nmatched with yield:\n" . (codem yield) . "\n" } else: { print: code . "\ndid not match\n" } } } }