- additional methods for multibyte UFT8 character detection
This commit is contained in:
parent
396e140102
commit
7d9b30fff2
@ -3,6 +3,10 @@ All notable changes to this project will be documented in this file.
|
||||
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
||||
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
||||
|
||||
## Version 1.1.7 - 2022-xx-yy
|
||||
### Fixed
|
||||
- handling of german umlauts (and other unicode characters)
|
||||
|
||||
## Version 1.1.6 - 2023-01-18
|
||||
### Changed
|
||||
- bumped version for latest WotLK Classic patch
|
||||
|
@ -287,6 +287,97 @@ local function getNextCharUtf8(word)
|
||||
end
|
||||
end
|
||||
|
||||
local function isUtf8MultiByte(word)
|
||||
return Grichelde.F.length(word) ~= Grichelde.F.lengthUtf8(word)
|
||||
end
|
||||
|
||||
local function getUtf8Sequence(word)
|
||||
if ((word == nil) or (Grichelde.F.type(word) ~= "string") or (Grichelde.F.lengthUtf8(word) ~= 1)) then
|
||||
return nil
|
||||
end
|
||||
|
||||
--[[
|
||||
You could use the following code snippet to iterate over UTF-8 sequences
|
||||
(this will simply skip over most invalid codes):
|
||||
|
||||
for uchar in string.gmatch(ustring, "([%z\1-\127\194-\244][\128-\191]*)") do
|
||||
...
|
||||
end
|
||||
]]--
|
||||
|
||||
local sequence = "%z"
|
||||
local c1 = Grichelde.F.toByte(word, 1)
|
||||
sequence = sequence .. "\\" .. c1
|
||||
|
||||
if (c1 > 0) and (c1 <= 127) then
|
||||
-- UTF8-1
|
||||
return sequence
|
||||
end
|
||||
|
||||
local c2 = Grichelde.F.toByte(word, 2)
|
||||
sequence = sequence .. "\\" .. c2
|
||||
if (c1 >= 194) and (c1 <= 223) then
|
||||
-- UTF8-2
|
||||
return sequence
|
||||
end
|
||||
|
||||
local c3 = Grichelde.F.toByte(word, 3)
|
||||
sequence = sequence .. "\\" .. c3
|
||||
if (c1 >= 224) and (c1 <= 239) then
|
||||
-- UTF8-3
|
||||
return sequence
|
||||
end
|
||||
|
||||
local c4 = Grichelde.F.toByte(word, 4)
|
||||
sequence = sequence .. "\\" .. c4
|
||||
if (c1 >= 240) and (c1 <= 244) then
|
||||
-- UTF8-4
|
||||
return sequence
|
||||
end
|
||||
|
||||
return nil
|
||||
end
|
||||
|
||||
local function getUtf8Table(word)
|
||||
if ((word == nil) or (Grichelde.F.type(word) ~= "string") or (Grichelde.F.lengthUtf8(word) ~= 1)) then
|
||||
return nil
|
||||
end
|
||||
|
||||
--[[
|
||||
You could use the following code snippet to iterate over UTF-8 sequences
|
||||
(this will simply skip over most invalid codes):
|
||||
|
||||
for uchar in string.gmatch(ustring, "([%z\1-\127\194-\244][\128-\191]*)") do
|
||||
...
|
||||
end
|
||||
]]--
|
||||
|
||||
local tbl = {}
|
||||
|
||||
local c1 = Grichelde.F.toByte(word, 1)
|
||||
Grichelde.F.tInsert(tbl, "%z\\" .. c1)
|
||||
|
||||
local c2 = Grichelde.F.toByte(word, 2)
|
||||
if (c1 >= 194) and (c1 <= 223) then
|
||||
-- UTF8-2
|
||||
Grichelde.F.tInsert(tbl, "\\" .. c2)
|
||||
end
|
||||
|
||||
local c3 = Grichelde.F.toByte(word, 3)
|
||||
if (c1 >= 224) and (c1 <= 239) then
|
||||
-- UTF8-3
|
||||
Grichelde.F.tInsert(tbl, "\\" .. c3)
|
||||
end
|
||||
|
||||
local c4 = Grichelde.F.toByte(word, 4)
|
||||
if (c1 >= 240) and (c1 <= 244) then
|
||||
-- UTF8-4
|
||||
Grichelde.F.tInsert(tbl, "\\" .. c4)
|
||||
end
|
||||
|
||||
return tbl
|
||||
end
|
||||
|
||||
local function isLetter(word)
|
||||
local char = Grichelde.F.getNextCharUtf8(word)
|
||||
return (char ~= nil) and (Grichelde.F.toUpper(char) ~= Grichelde.F.toLower(char))
|
||||
@ -418,6 +509,9 @@ Grichelde.F = {
|
||||
toUpper = _G.strupper,
|
||||
toLower = _G.strlower,
|
||||
getNextCharUtf8 = getNextCharUtf8,
|
||||
isUtf8MultiByte = isUtf8MultiByte,
|
||||
getUtf8Sequence = getUtf8Sequence,
|
||||
getUtf8Table = getUtf8Table,
|
||||
isLetter = isLetter,
|
||||
isNumber = isNumber,
|
||||
isUpper = isUpper,
|
||||
|
@ -72,7 +72,7 @@ function Grichelde:RunTests()
|
||||
["OSSO"] = "OCHO",
|
||||
["ooSS"] = "ooCH",
|
||||
["schmeissen"] = "chmeichen",
|
||||
["Schön"] = "Chön",
|
||||
["Schön"] = "Chön",
|
||||
}
|
||||
)
|
||||
ok = ok + o
|
||||
@ -776,6 +776,56 @@ function Grichelde:RunTests()
|
||||
ok = ok + o
|
||||
all = all + a
|
||||
|
||||
o, a = test(
|
||||
"umlauts",
|
||||
{
|
||||
replacement_10 = {
|
||||
order = 10,
|
||||
searchText = "ä",
|
||||
replaceText = "ae",
|
||||
exactCase = false,
|
||||
consolidate = false,
|
||||
matchWhen = 2,
|
||||
stopOnMatch = false,
|
||||
},
|
||||
replacement_11 = {
|
||||
order = 11,
|
||||
searchText = "ö",
|
||||
replaceText = "oe",
|
||||
exactCase = false,
|
||||
consolidate = false,
|
||||
matchWhen = 2,
|
||||
stopOnMatch = false,
|
||||
},
|
||||
replacement_12 = {
|
||||
order = 12,
|
||||
searchText = "ü",
|
||||
replaceText = "ue",
|
||||
exactCase = false,
|
||||
consolidate = false,
|
||||
matchWhen = 2,
|
||||
stopOnMatch = false,
|
||||
},
|
||||
replacement_13 = {
|
||||
order = 13,
|
||||
searchText = "ß",
|
||||
replaceText = "ss",
|
||||
exactCase = false,
|
||||
consolidate = false,
|
||||
matchWhen = 2,
|
||||
stopOnMatch = false,
|
||||
},
|
||||
},
|
||||
{
|
||||
["Hallä"] = "Hallae",
|
||||
["Ätsch"] = "Aetsch",
|
||||
["Hällöleü"] = "Haelloeleue",
|
||||
["ÜöÄ"] = "UeoeAe",
|
||||
}
|
||||
)
|
||||
ok = ok + o
|
||||
all = all + a
|
||||
|
||||
if (ok == all) then
|
||||
self:PrefixedPrint("All %d tests %s", all, cGreen("passed"))
|
||||
else
|
||||
|
Loading…
x
Reference in New Issue
Block a user