Moduł:Xlat
Wygląd
Dokumentacja modułu
[stwórz] [ ]
Zobacz podstrony tego modułu.
|
local function xlat(text, translation, preserveCombining)
if not text or not translation or (#text == 0) then
return text
end
local cache = {}
local function createCacheEntry(char)
local result = {
rules = translation[char] or { { char } },
class = {},
}
cache[char] = result
return result
end
local function loadRules(char)
return (cache[char] or createCacheEntry(char)).rules
end
local function checkRule(char, restriction)
local cached = cache[char] or createCacheEntry(char)
local result = cached.class[restriction]
if result == nil then
local test = mw.ustring.match(char, restriction)
result = test and true or false
end
mw.log("RULE << "..char.." >> match << "..restriction.." >> "..(result and "SUCCESS" or "FAIL"))
return result
end
local function translate(after, current, before)
-- translate the current character
local rules = loadRules(current)
for _, v in ipairs(rules) do
local a = not v.a or checkRule(after, v.a)
local b = not v.b or checkRule(before, v.b)
if a and b then
return v[1]
end
end
return current
end
local combining = mw.loadData( 'Moduł:IPA/data' ).combining
local result = {}
local nothing = { known = "", rest = "" }
local after2 = nothing
local after1 = nothing
local current = nothing
local before1 = nothing
local before2 = nothing
local iterator = mw.ustring.gcodepoint(text)
mw.log("X start")
local next = iterator()
mw.log("X next: "..string.format("U+%04X", next))
while next do
char = { next }
next = false
-- read next "character" from input
while not next do
next = iterator()
mw.log("X next: "..(next and string.format("U+%04X", next) or "<NIL>"))
if not next or not combining[next] then
break
end
-- accumulate whole character
table.insert(char, next)
next = false
end
-- shift window
after2 = after1
after1 = current
current = before1
before1 = before2
before2 = {}
-- determine the largest known part of the current character
local j = #char
while j >= 1 do
local part = mw.ustring.char(unpack(char, i, j))
if translation[part] then
before2.known = part
before2.rest = j < #char and mw.ustring.char(unpack(char, j+1, #char)) or ""
break
end
j = j - 1
end
if j == 0 then
-- use whole unrecognized character
before2.known = mw.ustring.char(unpack(char, i, #char))
before2.rest = ""
end
if current.known ~= "" then
local translated = translate(after2.known..after1.known, current.known, before1.known..before2.known)
mw.log("X trans: "..after2.known..after1.known.." - "..current.known.." - "..before1.known..before2.known.." → "..(translated or "<NIL>"))
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
end
end
-- translate last characters
local translated = translate(after1.known..current.known, before1.known, before2.known)
mw.log("X trans: "..after1.known..current.known.." - "..before1.known.." - "..before2.known.." → "..(translated or "<NIL>"))
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
local translated = translate(current.known..before1.known, before2.known, "")
mw.log("X trans: "..current.known..before1.known.." - "..before2.known.." - ".." → "..(translated or "<NIL>"))
table.insert(result, translated)
if (#translated > 0) and preserveCombining then
table.insert(result, current.rest)
end
mw.log("X stop")
local result = table.concat(result, "")
mw.log("return \""..result.."\"")
return result
end
local function loadXlatData(lang, name)
if not lang or (#lang == 0) then
mw.log("Brak kodu języka")
return nil
end
if not name then
mw.log("Brak wyboru tabeli");
end
local dataModule = 'Moduł:Xlat/'..lang
local success, module = pcall(mw.loadData, dataModule)
if not success or not module then
mw.log("Błąd ładowania modułu dla języka: "..lang)
return nil
end
local data = module[name]
if not data then
mw.log("Błąd ładowania tabeli '"..name.."' dla języka: "..lang)
return nil
end
local model = false
for k, v in pairs(data) do
local t = type(v)
if t == "string" then
model = "gsub"
elseif t == "table" then
model = "xlat"
end
break
end
if not model then
return nil
end
return data, model
end
local function jamo(text)
-- http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf#G24646
local SBase = 44032 -- 0xAC00
local LBase = 4352 -- 0x1100
local VBase = 4449 -- 0x1161
local TBase = 4519 -- 0x11A7
local SCount = 11172
local LCount = 19
local VCount = 21
local TCount = 28
local NCount = VCount * TCount
local result = {}
local iterator = mw.ustring.gcodepoint(text)
for S in iterator do
local SIndex = S - SBase
if (0 <= SIndex) and (SIndex < SCount) then
local L = LBase + math.floor(SIndex / NCount)
local V = VBase + math.floor(math.fmod(SIndex, NCount) / TCount)
local T = TBase + math.fmod(SIndex, TCount)
table.insert(result, L)
table.insert(result, V)
if T ~= TBase then
table.insert(result, T)
mw.log("S:"..S.." -> L:"..L..", V:"..V..", T:"..T)
else
mw.log("S:"..S.." -> L:"..L..", V:"..V)
end
else
table.insert(result, S)
mw.log("S:"..S)
end
end
return mw.ustring.char(unpack(result))
end
local function decompose(text, method)
if method == nil then
-- default to composite characters
return mw.ustring.toNFC(text)
elseif method == false then
-- nothing
return text
elseif method == true then
-- decompose
return mw.ustring.toNFD(text)
elseif method == "NFD" then
return mw.ustring.toNFD(text)
elseif method == "NFC" then
return mw.ustring.toNFC(text)
elseif (method == "JAMO") then
return jamo(text)
else
error("unsupported decomposition method: "..method)
end
end
local function transform(frame, method)
local pf = frame:getParent()
local text = frame.args[1] or pf.args[1]
if not text or (#text == 0) then
mw.log("Brak tekstu")
return
end
local lang = frame.args.lang or pf.args.lang
local data, model = loadXlatData(lang, method)
if not data then
mw.log("Brak przepisu transformacji '"..method.."' dla języka "..lang)
return
end
mw.log(model.." "..text)
if model == "gsub" then
text = mw.ustring.toNFD(text)
local result, count = mw.ustring.gsub(text, ".", data)
return mw.ustring.toNFC(result)
end
if model == "xlat" then
local keepAccents = data[0] and data[0].keepAccents or false
local accents = frame.args.accents or pf.args.accents
if accents then
keepAccents = accents == "preserve"
end
text = decompose(text, data[0] and data[0].decompose or false)
local result = xlat(text, data, keepAccents)
result = mw.ustring.toNFC(result)
return result
end
mw.log("Nieobsługiwany model transformacji '"..model.."' dla języka "..lang)
end
return {
transliteracja = function (frame)
return transform(frame, "transliteracja")
end,
transkrypcja = function (frame)
return transform(frame, "transkrypcja")
end,
}