|
|
@ -287,6 +287,97 @@ local function getNextCharUtf8(word)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local function isUtf8MultiByte(word)
|
|
|
|
|
|
|
|
return Grichelde.F.length(word) ~= Grichelde.F.lengthUtf8(word)
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local function getUtf8Sequence(word)
|
|
|
|
|
|
|
|
if ((word == nil) or (Grichelde.F.type(word) ~= "string") or (Grichelde.F.lengthUtf8(word) ~= 1)) then
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
--[[
|
|
|
|
|
|
|
|
You could use the following code snippet to iterate over UTF-8 sequences
|
|
|
|
|
|
|
|
(this will simply skip over most invalid codes):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for uchar in string.gmatch(ustring, "([%z\1-\127\194-\244][\128-\191]*)") do
|
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
]]--
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local sequence = "%z"
|
|
|
|
|
|
|
|
local c1 = Grichelde.F.toByte(word, 1)
|
|
|
|
|
|
|
|
sequence = sequence .. "\\" .. c1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (c1 > 0) and (c1 <= 127) then
|
|
|
|
|
|
|
|
-- UTF8-1
|
|
|
|
|
|
|
|
return sequence
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local c2 = Grichelde.F.toByte(word, 2)
|
|
|
|
|
|
|
|
sequence = sequence .. "\\" .. c2
|
|
|
|
|
|
|
|
if (c1 >= 194) and (c1 <= 223) then
|
|
|
|
|
|
|
|
-- UTF8-2
|
|
|
|
|
|
|
|
return sequence
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local c3 = Grichelde.F.toByte(word, 3)
|
|
|
|
|
|
|
|
sequence = sequence .. "\\" .. c3
|
|
|
|
|
|
|
|
if (c1 >= 224) and (c1 <= 239) then
|
|
|
|
|
|
|
|
-- UTF8-3
|
|
|
|
|
|
|
|
return sequence
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local c4 = Grichelde.F.toByte(word, 4)
|
|
|
|
|
|
|
|
sequence = sequence .. "\\" .. c4
|
|
|
|
|
|
|
|
if (c1 >= 240) and (c1 <= 244) then
|
|
|
|
|
|
|
|
-- UTF8-4
|
|
|
|
|
|
|
|
return sequence
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local function getUtf8Table(word)
|
|
|
|
|
|
|
|
if ((word == nil) or (Grichelde.F.type(word) ~= "string") or (Grichelde.F.lengthUtf8(word) ~= 1)) then
|
|
|
|
|
|
|
|
return nil
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
--[[
|
|
|
|
|
|
|
|
You could use the following code snippet to iterate over UTF-8 sequences
|
|
|
|
|
|
|
|
(this will simply skip over most invalid codes):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for uchar in string.gmatch(ustring, "([%z\1-\127\194-\244][\128-\191]*)") do
|
|
|
|
|
|
|
|
...
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
]]--
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local tbl = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local c1 = Grichelde.F.toByte(word, 1)
|
|
|
|
|
|
|
|
Grichelde.F.tInsert(tbl, "%z\\" .. c1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local c2 = Grichelde.F.toByte(word, 2)
|
|
|
|
|
|
|
|
if (c1 >= 194) and (c1 <= 223) then
|
|
|
|
|
|
|
|
-- UTF8-2
|
|
|
|
|
|
|
|
Grichelde.F.tInsert(tbl, "\\" .. c2)
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local c3 = Grichelde.F.toByte(word, 3)
|
|
|
|
|
|
|
|
if (c1 >= 224) and (c1 <= 239) then
|
|
|
|
|
|
|
|
-- UTF8-3
|
|
|
|
|
|
|
|
Grichelde.F.tInsert(tbl, "\\" .. c3)
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
local c4 = Grichelde.F.toByte(word, 4)
|
|
|
|
|
|
|
|
if (c1 >= 240) and (c1 <= 244) then
|
|
|
|
|
|
|
|
-- UTF8-4
|
|
|
|
|
|
|
|
Grichelde.F.tInsert(tbl, "\\" .. c4)
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return tbl
|
|
|
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
local function isLetter(word)
|
|
|
|
local function isLetter(word)
|
|
|
|
local char = Grichelde.F.getNextCharUtf8(word)
|
|
|
|
local char = Grichelde.F.getNextCharUtf8(word)
|
|
|
|
return (char ~= nil) and (Grichelde.F.toUpper(char) ~= Grichelde.F.toLower(char))
|
|
|
|
return (char ~= nil) and (Grichelde.F.toUpper(char) ~= Grichelde.F.toLower(char))
|
|
|
@ -418,6 +509,9 @@ Grichelde.F = {
|
|
|
|
toUpper = _G.strupper,
|
|
|
|
toUpper = _G.strupper,
|
|
|
|
toLower = _G.strlower,
|
|
|
|
toLower = _G.strlower,
|
|
|
|
getNextCharUtf8 = getNextCharUtf8,
|
|
|
|
getNextCharUtf8 = getNextCharUtf8,
|
|
|
|
|
|
|
|
isUtf8MultiByte = isUtf8MultiByte,
|
|
|
|
|
|
|
|
getUtf8Sequence = getUtf8Sequence,
|
|
|
|
|
|
|
|
getUtf8Table = getUtf8Table,
|
|
|
|
isLetter = isLetter,
|
|
|
|
isLetter = isLetter,
|
|
|
|
isNumber = isNumber,
|
|
|
|
isNumber = isNumber,
|
|
|
|
isUpper = isUpper,
|
|
|
|
isUpper = isUpper,
|
|
|
|