Module:Scripts/charToScript

local subexport = {}

-- Copied from Module:Unicode data. local floor = math.floor local function binaryRangeSearch(codepoint, ranges) local low, mid, high low, high = 1, ranges.length or require "Module:table".length(ranges) while low <= high do		mid = floor((low + high) / 2) local range = ranges[mid] if codepoint < range[1] then high = mid - 1 elseif codepoint <= range[2] then return range, mid else low = mid + 1 end end return nil, mid end

-- Copied from Module:Unicode data. local function linearRangeSearch(codepoint, ranges) for i, range in ipairs(ranges) do		if codepoint < range[1] then break elseif codepoint <= range[2] then return range end end end

local function compareRanges(range1, range2) return range1[1] < range2[1] end

-- Save previously used codepoint ranges in case another character is in the -- same range. local rangesCache = {}

--[=[	Takes a codepoint or a character and finds the script code (if any) that is	appropriate for it based on the codepoint, using the data module Module:scripts/recognition data. The data module was generated from the patterns in Module:scripts/data using Module:User:Erutuon/script recognition.

Converts the character to a codepoint. Returns a script code if the codepoint is in the list of individual characters, or if it is in one of the defined ranges in the 4096-character block that it belongs to, else returns "None". ]=] local charToScriptData function subexport.charToScript(char) charToScriptData = charToScriptData or mw.loadData("Module:scripts/recognition data") local t = type(char) local codepoint if t == "string" then local etc codepoint, etc = mw.ustring.codepoint(char, 1, 2) if etc then error("bad argument #1 to 'charToScript' (expected a single character)") end elseif t == "number" then codepoint = char else error(("bad argument #1 to 'charToScript' (expected string or a number, got %s)")			:format(t)) end

local individualMatch = charToScriptData.individual[codepoint] if individualMatch then return individualMatch else local range if rangesCache[1] then range = linearRangeSearch(codepoint, rangesCache) if range then return range[3] end end

local index = floor(codepoint / 0x1000)

range = linearRangeSearch(index, charToScriptData.blocks) if not range and charToScriptData[index] then range = binaryRangeSearch(codepoint, charToScriptData[index]) if range then table.insert(rangesCache, range) table.sort(rangesCache, compareRanges) end end return range and range[3] or "None" end end

function subexport.findBestScriptWithoutLang(text) local scripts = {} for character in text:gmatch("[%z\1-\127\194-\244][\128-\191]*") do		local script = subexport.charToScript(character) scripts[script] = (scripts[script] or 0) + 1 end local bestScript local greatestCount = 0 for script, count in pairs(scripts) do		if count > greatestCount then bestScript = script greatestCount = count end end return bestScript end

return subexport