# HG changeset patch # User Michael Pavone # Date 1406089901 25200 # Node ID a4c2b31acba7b0fb4176596e1d8c126331e39ec7 # Parent b01d7c1b4edd3be515f5d0a65ebe28bf7a0b81b2 Add utf8 method to integer types for converting a number into a utf8 string made up of the corresponding character code diff -r b01d7c1b4edd -r a4c2b31acba7 cbackend.js --- a/cbackend.js Tue Jul 22 18:39:00 2014 -0700 +++ b/cbackend.js Tue Jul 22 21:31:41 2014 -0700 @@ -760,6 +760,42 @@ 'return &(str->header);' ] }); + intObj.addMessage('utf8', { + vars: {str: 'string *', norm: 'uint32_t'}, + lines: [ + 'str = (string *)make_object(&string_meta, NULL, 0);', + 'str->len = 1;', + 'norm = self->num;', + 'if (norm < 0x80) {', + ' str->bytes = 1;', + ' str->data = GC_MALLOC(2);', + ' str->data[0] = norm;', + '} else if(norm < 0x800) {', + ' str->bytes = 2;', + ' str->data = GC_MALLOC(3);', + ' str->data[0] = 0xC0 | norm >> 6;', + ' str->data[1] = 0x80 | (norm & 0x3F);', + '} else if(norm < 0x10000) {', + ' str->bytes = 3;', + ' str->data = GC_MALLOC(4);', + ' str->data[0] = 0xE0 | norm >> 12;', + ' str->data[1] = 0x80 | ((norm >> 6) & 0x3F);', + ' str->data[2] = 0x80 | (norm & 0x3F);', + '} else if(norm < 0x10FFFF) {', + ' str->bytes = 4;', + ' str->data = GC_MALLOC(5);', + ' str->data[0] = 0xF0 | norm >> 18;', + ' str->data[1] = 0x80 | ((norm >> 12) & 0x3F);', + ' str->data[2] = 0x80 | ((norm >> 6) & 0x3F);', + ' str->data[3] = 0x80 | (norm & 0x3F);', + '} else {', + ' str->len = str->bytes = 0;', + ' str->data = GC_MALLOC(1);', + '}', + 'str->data[str->bytes] = 0;', + 'return &(str->header);' + ] + }); //7FFFFFFFFFFFFFFF //01234567890123456789 intObj.addMessage('hex', { diff -r b01d7c1b4edd -r a4c2b31acba7 samples/stringops.tp --- a/samples/stringops.tp Tue Jul 22 18:39:00 2014 -0700 +++ b/samples/stringops.tp Tue Jul 22 21:31:41 2014 -0700 @@ -21,5 +21,6 @@ print: (string: ("12abcDEF" parseHex32)) . "\n" print: (string: ("FFFFFFFFFF" parseHex64)) . "\n" print: "'" . (" \nfoobar baz " ltrim) . "'\n" + print: (72 utf8) . (101 utf8) . (108 utf8) . (108 utf8) . (111 utf8) . (0x1F604 utf8) . "\n" } }