04.05.2017, 23:17
(This post was last modified: 05.05.2017, 00:00 by Erwin van der Zwart.)
Hi,
You probably need octal unicode, but this script includes DEC, OCTAL and HTML formatting of UTF8:
Try this:
BR,
Erwin
You probably need octal unicode, but this script includes DEC, OCTAL and HTML formatting of UTF8:
Try this:
Code:
function utf8tounicode(utf8str)
assert(type(utf8str) == "string")
local res, seq, val = {}, 0, nil
for i = 1, #utf8str do
local c = string.byte(utf8str, i)
if seq == 0 then
table.insert(res, val)
seq = c < 0x80 and 1 or c < 0xE0 and 2 or c < 0xF0 and 3 or
c < 0xF8 and 4 or c < 0xFC and 5 or c < 0xFE and 6 or
log("invalid UTF-8 character sequence")
val = bit.band(c, 2^(8-seq) - 1)
else
val = bit.bor(bit.lshift(val, 6), bit.band(c, 0x3F))
end
seq = seq - 1
end
table.insert(res, val)
resoct = {}
reshtml = {}
for i , object in ipairs(res) do
table.insert(resoct, '0' .. string.format("%o", object))
table.insert(reshtml, '&#' .. object .. ';')
end
return {res,table.concat(res),resoct,table.concat(resoct),reshtml,table.concat(reshtml)}
end
mytext = 'åæø'
result = utf8tounicode(mytext)
log(result)
log(result[1]) -- Table with separate dec results
log(result[2]) -- concatinated dec result table as string
log(result[3]) -- Table with separate oct results
log(result[4]) -- concatinated oct result table as string
log(result[5]) -- Table with separate html results
log(result[6]) -- concatinated html result table as string
BR,
Erwin