annotate modules/string.tp @ 243:5b830147c1cd

Use a lightweight substring object in a few places in the parser to improve performance for large files.
author Mike Pavone <pavone@retrodev.com>
date Sun, 05 Jan 2014 23:07:26 -0800
parents 38140b7dbe3d
children 32964a4e7a33
rev   line source
pavone@88 1 #{
pavone@88 2 llProperty: len withType: uint32_t
pavone@88 3 llProperty: bytes withType: uint32_t
pavone@88 4 llProperty: data withType: (char ptr)
pavone@147 5
pavone@88 6 llMessage: length withVars: {
pavone@88 7 intret <- (obj_int32 ptr)
pavone@88 8 } andCode: {
pavone@88 9 intret <- make_object: (addr_of: obj_int32_meta) NULL 0
pavone@88 10 intret num!: len
pavone@88 11 intret
pavone@88 12 }
pavone@147 13
pavone@88 14 llMessage: byte_length withVars: {
pavone@88 15 intret <- (obj_int32 ptr)
pavone@88 16 } andCode: {
pavone@88 17 intret <- make_object: (addr_of: obj_int32_meta) NULL 0
pavone@88 18 intret num!: bytes
pavone@88 19 intret
pavone@88 20 }
pavone@147 21
pavone@88 22 llMessage: EQ_ withVars: {
pavone@88 23 argb <- (string ptr)
pavone@88 24 } andCode: :argb {
pavone@88 25 if: len = (argb len) && bytes = (argb bytes) && (not: (memcmp: data (argb data) bytes)) {
pavone@88 26 true
pavone@88 27 }
pavone@88 28 }
pavone@147 29
pavone@243 30 llMessage: compareSub withVars: {
pavone@243 31 argb <- string ptr
pavone@243 32 myoff <- obj_int32 ptr
pavone@243 33 boff <- obj_int32 ptr
pavone@243 34 clen <- obj_int32 ptr
pavone@243 35 intret <- obj_int32 ptr
pavone@243 36 } andCode: :argb myoff boff clen {
pavone@243 37 intret <- make_object: (addr_of: obj_int32_meta) NULL 0
pavone@243 38 intret num!: (memcmp: data + (myoff num) (argb data) + (boff num) (clen num))
pavone@243 39 intret
pavone@243 40 }
pavone@243 41
pavone@88 42 llMessage: NEQ_ withVars: {
pavone@88 43 argb <- (string ptr)
pavone@88 44 } andCode: :argb {
pavone@88 45 if: len != (argb len) || bytes != (argb bytes) || (memcmp: data (argb data) bytes) {
pavone@88 46 true
pavone@88 47 }
pavone@88 48 }
pavone@147 49
pavone@88 50 llMessage: print withVars: {} andCode: {
pavone@88 51 fwrite: data 1 bytes stdout
pavone@88 52 self
pavone@88 53 }
pavone@147 54
pavone@88 55 llMessage: string withVars: {} andCode: {
pavone@88 56 self
pavone@88 57 }
pavone@147 58
pavone@88 59 llMessage: CAT_ withVars: {
pavone@88 60 argbo <- (object ptr)
pavone@88 61 argb <- (string ptr)
pavone@88 62 out <- (string ptr)
pavone@88 63 } andCode: :argbo {
pavone@88 64 argb <- mcall: string 1 argbo
pavone@88 65 out <- make_object: (addr_of: string_meta) NULL 0
pavone@88 66 out bytes!: bytes + (argb bytes)
pavone@88 67 out len!: len + (argb len)
pavone@88 68 out data!: (GC_MALLOC_ATOMIC: (out bytes) + 1)
pavone@88 69 memcpy: (out data) data bytes
pavone@88 70 memcpy: (out data) + bytes (argb data) (argb bytes) + 1
pavone@88 71 out
pavone@88 72 }
pavone@147 73
pavone@88 74 llMessage: byte withVars: {
pavone@88 75 index <- (obj_int32 ptr)
pavone@88 76 intret <- (obj_int32 ptr)
pavone@88 77 } andCode: :index {
pavone@88 78 intret <- make_object: (addr_of: obj_int32_meta) NULL 0
pavone@88 79 intret num!: (if: (index num) < bytes { data get: (index num) } else: {0})
pavone@88 80 intret
pavone@88 81 }
pavone@147 82
pavone@88 83 llMessage: int32 withVars: {
pavone@88 84 intret <- (obj_int32 ptr)
pavone@88 85 } andCode: {
pavone@88 86 intret <- make_object: (addr_of: obj_int32_meta) NULL 0
pavone@88 87 intret num!: (atoi: data)
pavone@88 88 intret
pavone@88 89 }
pavone@147 90
pavone@158 91 parseHex32 <- {
pavone@158 92 num <- 0u32
pavone@158 93 cur <- 0
pavone@158 94 a <- uint32: ("a" byte: 0)
pavone@158 95 A <- uint32: ("A" byte: 0)
pavone@158 96 f <- uint32: ("f" byte: 0)
pavone@158 97 F <- uint32: ("F" byte: 0)
pavone@158 98 zero <- "0" byte: 0
pavone@158 99 nine <- "9" byte: 0
pavone@158 100 while: { cur < byte_length} do: {
pavone@158 101 b <- uint32: (byte: cur)
pavone@158 102 cur <- cur + 1
pavone@158 103 if: b >= zero && b <= nine {
pavone@158 104 num <- num * 16 + (b - zero)
pavone@158 105 } else: {
pavone@158 106 if: b >= a && b <= f {
pavone@158 107 num <- num * 16 + (b - a) + 10u32
pavone@158 108 } else: {
pavone@158 109 if: b >= A && b <= F {
pavone@158 110 num <- num * 16 + (b - A) + 10u32
pavone@158 111 } else: {
pavone@158 112 cur <- byte_length
pavone@158 113 }
pavone@158 114 }
pavone@158 115 }
pavone@158 116 }
pavone@158 117 num
pavone@158 118 }
pavone@158 119
pavone@158 120 parseHex64 <- {
pavone@158 121 num <- 0u64
pavone@158 122 cur <- 0
pavone@158 123 a <- uint64: ("a" byte: 0)
pavone@158 124 A <- uint64: ("A" byte: 0)
pavone@158 125 f <- uint64: ("f" byte: 0)
pavone@158 126 F <- uint64: ("F" byte: 0)
pavone@158 127 zero <- "0" byte: 0
pavone@158 128 nine <- "9" byte: 0
pavone@158 129 while: { cur < byte_length} do: {
pavone@158 130 b <- uint64: (byte: cur)
pavone@158 131 cur <- cur + 1
pavone@158 132 if: b >= zero && b <= nine {
pavone@158 133 num <- num * 16 + (b - zero)
pavone@158 134 } else: {
pavone@158 135 if: b >= a && b <= f {
pavone@158 136 num <- num * 16 + (b - a) + 10u64
pavone@158 137 } else: {
pavone@158 138 if: b >= A && b <= F {
pavone@158 139 num <- num * 16 + (b - A) + 10u64
pavone@158 140 } else: {
pavone@158 141 cur <- byte_length
pavone@158 142 }
pavone@158 143 }
pavone@158 144 }
pavone@158 145 }
pavone@158 146 num
pavone@158 147 }
pavone@158 148
pavone@88 149 llMessage: hash withVars: {
pavone@88 150 intret <- (obj_int32 ptr)
pavone@88 151 i <- uint32_t
pavone@88 152 } andCode: {
pavone@88 153 intret <- make_object: (addr_of: obj_int32_meta) NULL 0
pavone@88 154 intret num!: 0
pavone@88 155 if: bytes {
pavone@88 156 intret num!: (data get: 0) * 128
pavone@88 157 i <- 0
pavone@88 158 while: { i < bytes } do: {
pavone@88 159 intret num!: (1000003 * (intret num)) xor (data get: i)
pavone@88 160 i <- i + 1
pavone@88 161 }
pavone@88 162 intret num!: (intret num) xor bytes
pavone@88 163 }
pavone@88 164 intret
pavone@88 165 }
pavone@147 166
pavone@154 167 llMessage: find:startingAt:else withVars: {
pavone@150 168 intret <- obj_int32 ptr
pavone@150 169 oneedle <- object ptr
pavone@154 170 startpos <- obj_int32 ptr
pavone@150 171 ifNotFound <- object ptr
pavone@150 172 sneedle <- string ptr
pavone@150 173 i <- uint32_t
pavone@150 174 notFound <- uint32_t
pavone@154 175 } andCode: :oneedle :startpos :ifNotFound {
pavone@150 176 sneedle <- mcall: string 1 oneedle
pavone@154 177 i <- startpos num
pavone@150 178 notFound <- 1
pavone@150 179 while: { notFound && i + (sneedle bytes) <= bytes} do: {
pavone@150 180 if: (memcmp: data + i (sneedle data) (sneedle bytes)) = 0 {
pavone@150 181 notFound <- 0
pavone@150 182 } else: {
pavone@150 183 i <- i + 1
pavone@150 184 }
pavone@150 185 }
pavone@150 186 if: notFound {
pavone@150 187 ccall: ifNotFound 0
pavone@150 188 } else: {
pavone@150 189 intret <- make_object: (addr_of: obj_int32_meta) NULL 0
pavone@150 190 intret num!: i
pavone@150 191 intret
pavone@150 192 }
pavone@150 193 }
pavone@150 194
pavone@154 195 find:else <- :toFind :orElse {
pavone@154 196 find: toFind startingAt: 0 else: orElse
pavone@154 197 }
pavone@154 198
pavone@151 199 llMessage: from:withLength withVars: {
pavone@151 200 from <- obj_int32 ptr
pavone@151 201 tocopy <- obj_int32 ptr
pavone@151 202 ret <- string ptr
pavone@151 203 start <- int32_t
pavone@151 204 clampedLen <- int32_t
pavone@151 205 } andCode: :from :tocopy {
pavone@151 206 start <- from num
pavone@151 207 if: start < 0 {
pavone@151 208 start <- bytes + start
pavone@151 209 }
pavone@151 210 if: start > bytes {
pavone@151 211 start <- bytes
pavone@151 212 }
pavone@151 213 clampedLen <- tocopy num
pavone@151 214 if: start + clampedLen > bytes {
pavone@151 215 clampedLen <- bytes - start
pavone@151 216 }
pavone@151 217 ret <- make_object: (addr_of: string_meta) NULL 0
pavone@151 218 ret data!: (GC_MALLOC_ATOMIC: clampedLen + 1)
pavone@151 219 memcpy: (ret data) data + start clampedLen
pavone@151 220 ret len!: clampedLen
pavone@151 221 ret bytes!: clampedLen
pavone@151 222 ret
pavone@151 223 }
pavone@151 224
pavone@151 225 from <- :start {
pavone@151 226 from: start withLength: length
pavone@151 227 }
pavone@151 228
pavone@152 229 partitionOn <- :delim {
pavone@152 230 pos <- find: delim else: { -1 }
pavone@152 231 if: pos >= 0 {
pavone@152 232 _before <- from: 0 withLength: pos
pavone@152 233 _after <- from: (pos + (delim length))
pavone@152 234 #{
pavone@152 235 before <- _before
pavone@152 236 after <- _after
pavone@152 237 }
pavone@152 238 } else: {
pavone@152 239 _before <- self
pavone@152 240 #{
pavone@152 241 before <- _before
pavone@152 242 after <- ""
pavone@152 243 }
pavone@152 244 }
pavone@152 245 }
pavone@152 246
pavone@152 247 splitOn <- :delim {
pavone@152 248 pos <- 0
pavone@152 249 pieces <- #[]
pavone@152 250 while: {
pavone@152 251 pos <- find: delim else: { -1 }
pavone@152 252 pos >= 0
pavone@152 253 } do: {
pavone@152 254 pieces append: (from: 0 withLength: pos)
pavone@152 255 self <- from: pos + (delim length)
pavone@152 256 }
pavone@152 257 pieces append: self
pavone@152 258 }
pavone@152 259
pavone@157 260 trim <- {
pavone@157 261 l <- length
pavone@157 262 start <- 0
pavone@157 263 space <- " " byte: 0
pavone@157 264 tab <- "\t" byte: 0
pavone@157 265 nl <- "\n" byte: 0
pavone@157 266 cr <- "\r" byte: 0
pavone@157 267
pavone@157 268 while: {
pavone@157 269 if: start < l {
pavone@157 270 b <- byte: start
pavone@157 271 b = space || b = tab || b = nl || b = cr
pavone@157 272 }
pavone@157 273 } do: {
pavone@157 274 start <- start + 1
pavone@157 275 }
pavone@157 276 end <- l
pavone@157 277 while: {
pavone@157 278 if: end > 0 {
pavone@157 279 b <- byte: end
pavone@157 280 b = space || b = tab || b = nl || b = cr
pavone@157 281 }
pavone@157 282 } do: {
pavone@157 283 end <- end + 1
pavone@157 284 }
pavone@157 285 from: start withLength: (end - start)
pavone@157 286 }
pavone@157 287
pavone@147 288 isInteger? <- { false }
pavone@154 289 isString? <- { true }
pavone@243 290 isBasicString? <- { true }
pavone@88 291 }