changeset 243:5b830147c1cd

Use a lightweight substring object in a few places in the parser to improve performance for large files.
author Mike Pavone <pavone@retrodev.com>
date Sun, 05 Jan 2014 23:07:26 -0800
parents 0e7982adc76b
children ae5188be523e
files modules/parser.tp modules/string.tp
diffstat 2 files changed, 75 insertions(+), 3 deletions(-) [+]
line wrap: on
line diff
--- a/modules/parser.tp	Sun Jan 05 20:56:25 2014 -0800
+++ b/modules/parser.tp	Sun Jan 05 23:07:26 2014 -0800
@@ -1,4 +1,63 @@
 {
+light:from:withLength <- :_base :_start :_len {
+	if: (not: (_base isBasicString?)) {
+		_start <- _start + (_base start)
+		_base <- _base base
+	}
+	_needsflat? <- true
+	_flat <- false
+	#{
+		//TODO: UTF-8 support
+		length <- { _len }
+		byte_length <- { _len }
+		string <- {
+			if: _needsflat? {
+				_flat <- _base from: _start withLength: _len
+			}
+			_flat
+		}
+		from:withLength <- :s :l {
+			if: (l + s) > _len {
+				l <- _len - s
+			}
+			_base from: (_start + s) withLength: l
+		}
+		from <- :s {
+			from: s withLength: (_len - s)
+		}
+		byte <- :index {
+			_base byte: (index + _start)
+		}
+		= <- :other {
+			if: (other length) = _len {
+				ostart <- 0
+				if: (not: (other isBasicString?)) {
+					ostart <- other start
+					other <- other _base
+				}
+				res <- _base compareSub: other _start ostart _len
+				res = 0
+			}
+		}
+		. <- :other {
+			(string: self) . other
+		}
+		int32 <- {
+			(string: self) int32
+		}
+		splitOn <- :delim {
+			(string: self) splitOn: delim
+		}
+		isString? <- { true }
+		isBasicString? <- { false }
+		base <- { _base }
+		start <- { _start }
+	}
+}
+
+light:from <- :base :start {
+	light: base from: start withLength: (base length) - start
+}
 _applyMatch <- :fun tomatch {
 		fun: tomatch
 	}
@@ -10,7 +69,7 @@
 			if: (tomatch length) > (str length) {
 				tomatch <- tomatch from: 0 withLength: (str length)
 			}
-			if: str = tomatch {
+			if: tomatch = str {
 				#{
 					if <- :self trueblock {
 						trueblock:
@@ -62,7 +121,7 @@
 								lm <- left
 								if: lm {
 									orig <- tomatch
-									tomatch <- tomatch from: (lm matchlen)
+									tomatch <- light: tomatch from: (lm matchlen)
 									rm <- right
 									if: rm {
 										total <- (rm matchlen) + (lm matchlen)
@@ -123,7 +182,7 @@
 				_match <- if: res {
 					count <- count + 1
 					//TODO: Use some kind of lightweight substring wrapper here
-					tomatch <- tomatch from: (res matchlen)
+					tomatch <- light: tomatch from: (res matchlen)
 					if: allBasic? {
 						ifnot: (res basicYield?) {
 							allBasic? <- false
--- a/modules/string.tp	Sun Jan 05 20:56:25 2014 -0800
+++ b/modules/string.tp	Sun Jan 05 23:07:26 2014 -0800
@@ -27,6 +27,18 @@
 		}
 	}
 
+	llMessage: compareSub withVars: {
+		argb <- string ptr
+		myoff <- obj_int32 ptr
+		boff <- obj_int32 ptr
+		clen <- obj_int32 ptr
+		intret <- obj_int32 ptr
+	} andCode: :argb myoff boff clen {
+		intret <- make_object: (addr_of: obj_int32_meta) NULL 0
+		intret num!: (memcmp: data + (myoff num) (argb data) + (boff num) (clen num))
+		intret
+	}
+
 	llMessage: NEQ_ withVars: {
 		argb <- (string ptr)
 	} andCode: :argb {
@@ -275,4 +287,5 @@
 
 	isInteger? <- { false }
 	isString? <- { true }
+	isBasicString? <- { true }
 }