view modules/string.tp @ 243:5b830147c1cd

Use a lightweight substring object in a few places in the parser to improve performance for large files.
author Mike Pavone <pavone@retrodev.com>
date Sun, 05 Jan 2014 23:07:26 -0800
parents 38140b7dbe3d
children 32964a4e7a33
line wrap: on
line source
#{
	llProperty: len withType: uint32_t
	llProperty: bytes withType: uint32_t
	llProperty: data withType: (char ptr)

	llMessage: length withVars: {
		intret <- (obj_int32 ptr)
	} andCode: {
		intret <- make_object: (addr_of: obj_int32_meta) NULL 0
		intret num!: len
		intret
	}

	llMessage: byte_length withVars: {
		intret <- (obj_int32 ptr)
	} andCode: {
		intret <- make_object: (addr_of: obj_int32_meta) NULL 0
		intret num!: bytes
		intret
	}

	llMessage: EQ_ withVars: {
		argb <- (string ptr)
	} andCode: :argb {
		if: len = (argb len) && bytes = (argb bytes) && (not: (memcmp: data (argb data) bytes)) {
			true
		}
	}

	llMessage: compareSub withVars: {
		argb <- string ptr
		myoff <- obj_int32 ptr
		boff <- obj_int32 ptr
		clen <- obj_int32 ptr
		intret <- obj_int32 ptr
	} andCode: :argb myoff boff clen {
		intret <- make_object: (addr_of: obj_int32_meta) NULL 0
		intret num!: (memcmp: data + (myoff num) (argb data) + (boff num) (clen num))
		intret
	}

	llMessage: NEQ_ withVars: {
		argb <- (string ptr)
	} andCode: :argb {
		if: len != (argb len) || bytes != (argb bytes) || (memcmp: data (argb data) bytes) {
			true
		}
	}

	llMessage: print withVars: {} andCode: {
		fwrite: data 1 bytes stdout
		self
	}

	llMessage: string withVars: {} andCode: {
		self
	}

	llMessage: CAT_ withVars: {
		argbo <- (object ptr)
		argb <- (string ptr)
		out <- (string ptr)
	} andCode: :argbo {
		argb <- mcall: string 1 argbo
		out <- make_object: (addr_of: string_meta) NULL 0
		out bytes!: bytes + (argb bytes)
		out len!: len + (argb len)
		out data!: (GC_MALLOC_ATOMIC: (out bytes) + 1)
		memcpy: (out data) data bytes
		memcpy: (out data) + bytes (argb data) (argb bytes) + 1
		out
	}

	llMessage: byte withVars: {
		index <- (obj_int32 ptr)
		intret <- (obj_int32 ptr)
	} andCode: :index {
		intret <- make_object: (addr_of: obj_int32_meta) NULL 0
		intret num!: (if: (index num) < bytes { data get: (index num) } else: {0})
		intret
	}

	llMessage: int32 withVars: {
		intret <- (obj_int32 ptr)
	} andCode: {
		intret  <- make_object: (addr_of: obj_int32_meta) NULL 0
		intret num!: (atoi: data)
		intret
	}

	parseHex32 <- {
		num <- 0u32
		cur <- 0
		a <- uint32: ("a" byte: 0)
		A <- uint32: ("A" byte: 0)
		f <- uint32: ("f" byte: 0)
		F <- uint32: ("F" byte: 0)
		zero <- "0" byte: 0
		nine <- "9" byte: 0
		while: { cur < byte_length} do: {
			b <- uint32: (byte: cur)
			cur <- cur + 1
			if: b >= zero && b <= nine {
				num <- num * 16 + (b - zero)
			} else: {
				if: b >= a && b <= f {
					num <- num * 16 + (b - a) + 10u32
				} else: {
					if: b >= A && b <= F {
						num <- num * 16 + (b - A) + 10u32
					} else: {
						cur <- byte_length
					}
				}
			}
		}
		num
	}

	parseHex64 <- {
		num <- 0u64
		cur <- 0
		a <- uint64: ("a" byte: 0)
		A <- uint64: ("A" byte: 0)
		f <- uint64: ("f" byte: 0)
		F <- uint64: ("F" byte: 0)
		zero <- "0" byte: 0
		nine <- "9" byte: 0
		while: { cur < byte_length} do: {
			b <- uint64: (byte: cur)
			cur <- cur + 1
			if: b >= zero && b <= nine {
				num <- num * 16 + (b - zero)
			} else: {
				if: b >= a && b <= f {
					num <- num * 16 + (b - a) + 10u64
				} else: {
					if: b >= A && b <= F {
						num <- num * 16 + (b - A) + 10u64
					} else: {
						cur <- byte_length
					}
				}
			}
		}
		num
	}

	llMessage: hash withVars: {
		intret <- (obj_int32 ptr)
		i <- uint32_t
	} andCode: {
		intret <- make_object: (addr_of: obj_int32_meta) NULL 0
		intret num!: 0
		if: bytes {
			intret num!: (data get: 0) * 128
			i <- 0
			while: { i < bytes } do: {
				intret num!: (1000003 * (intret num)) xor (data get: i)
				i <- i + 1
			}
			intret num!: (intret num) xor bytes
		}
		intret
	}

	llMessage: find:startingAt:else withVars: {
		intret <- obj_int32 ptr
		oneedle <- object ptr
		startpos <- obj_int32 ptr
		ifNotFound <- object ptr
		sneedle <- string ptr
		i <- uint32_t
		notFound <- uint32_t
	} andCode: :oneedle :startpos :ifNotFound {
		sneedle <- mcall: string 1 oneedle
		i <- startpos num
		notFound <- 1
		while: { notFound && i + (sneedle bytes) <= bytes} do: {
			if: (memcmp: data + i (sneedle data) (sneedle bytes)) = 0 {
				notFound <- 0
			} else: {
				i <- i + 1
			}
		}
		if: notFound {
			ccall: ifNotFound 0
		} else: {
			intret <- make_object: (addr_of: obj_int32_meta) NULL 0
			intret num!: i
			intret
		}
	}

	find:else <- :toFind :orElse {
		find: toFind startingAt: 0 else: orElse
	}

	llMessage: from:withLength withVars: {
		from <- obj_int32 ptr
		tocopy <- obj_int32 ptr
		ret <- string ptr
		start <- int32_t
		clampedLen <- int32_t
	} andCode: :from :tocopy {
		start <- from num
		if: start < 0 {
			start <- bytes + start
		}
		if: start > bytes {
			start <- bytes
		}
		clampedLen <- tocopy num
		if: start + clampedLen > bytes {
			clampedLen <- bytes - start
		}
		ret <- make_object: (addr_of: string_meta) NULL 0
		ret data!: (GC_MALLOC_ATOMIC: clampedLen + 1)
		memcpy: (ret data) data + start clampedLen
		ret len!: clampedLen
		ret bytes!: clampedLen
		ret
	}

	from <- :start {
		from: start withLength: length
	}

	partitionOn <- :delim {
		pos <- find: delim else: { -1 }
		if: pos >= 0 {
			_before <- from: 0 withLength: pos
			_after <- from: (pos + (delim length))
			#{
				before <- _before
				after <- _after
			}
		} else: {
			_before <- self
			#{
				before <- _before
				after <- ""
			}
		}
	}

	splitOn <- :delim {
		pos <- 0
		pieces <- #[]
		while: {
			pos <- find: delim else: { -1 }
			pos >= 0
		} do: {
			pieces append: (from: 0 withLength: pos)
			self <- from: pos + (delim length)
		}
		pieces append: self
	}

	trim <- {
		l <- length
		start <- 0
		space <- " " byte: 0
		tab <- "\t" byte: 0
		nl <- "\n" byte: 0
		cr <- "\r" byte: 0

		while: {
			if: start < l {
				b <- byte: start
				b = space || b = tab || b = nl || b = cr
			}
		} do: {
			start <- start + 1
		}
		end <- l
		while: {
			if: end > 0 {
				b <- byte: end
				b = space || b = tab || b = nl || b = cr
			}
		} do: {
			end <- end + 1
		}
		from: start withLength: (end - start)
	}

	isInteger? <- { false }
	isString? <- { true }
	isBasicString? <- { true }
}