Module:R:Woodhouse

--[[ This module looks up the Greek entry title in "Module:R:Woodhouse/psia1_to_infs", a list of verbs, and adds any infinitive forms to a list including the page title itself, in both potential proper and common forms. This list of Greek forms is used to retrieve English headwords in Woodhouse's dictionary from "Module:R:Woodhouse/reverse_index".  The infinitives step is necessary because Woodhouse mostly thus identifies verbs.  The function get_page then looks up each English headword alphabetically in "Module:R:Woodhouse/page_headwords", or "Module:R:Woodhouse/page_headwords_proper", or both, per "Module:R:Woodhouse/proper_or_both", to find  page numbers in the paper edition of the dictionary.  This `abstract' page number is slightly altered with a few exceptions to obtain the page number used in the ARTFL project's URL.  These URLs are returned with bibliographical information and HTML for display. A remaining issue is that, about 5% of the time, the page number will be off by one or two.  This problem may be slightly intricate to solve completely.

8 May 2020: Changed reverse_index implementation to reverse_index_tab_delimited implementation to save memory (The Lua table consumed 8x the binary text size.) ]]

local export = {}

local function remove_duplicates(ls) local hash = {} local ls0 = {} for i, v in ipairs(ls) do  		if not hash[v] then table.insert(ls0, v)      		hash[v] = true end end return ls0 end

local function concat(l1,l2) local l3 = {} if l1 then for i,v in pairs(l1) do l3[#l3+1] = v end end if l2 then for i,v in pairs(l2) do l3[#l3+1] = v end end return l3 end

local function b_search(ys, x, L, H)   L = L or 1 H = H or #ys if L < 0 then error('L < 0') end while L < H do       M = math.floor((L+H) / 2) if ys[M] < x then L = M+1 else H = M end end return L end

--comment for calculate_page_number_divergence: --page_number_divergence results from a few buffer pages before the first --headword, and subsequently, a few multi-page entries leaving some pages --without headwords. Each time this latter takes place, the index of --page-initial headwords drifts from the physical page number. local function calculate_page_number_divergence(w0, is_proper) return 4 + ((is_proper and 4) or ((w0 >= 'taking' and 4) or (w0 >= 'setting' and 3) or (w0 >= 'putrefaction' and 2) or 1)) end

local function get_page(page_headwords, page_headwords_proper, w)	local is_proper = mw.getLanguage('en'):ucfirst(w) == w	local w0 = mw.getLanguage('en'):lcfirst(w) local p_n_divergence = calculate_page_number_divergence(w0, is_proper) if not is_proper then return (b_search(page_headwords, w0, 0, 995) - 1) + p_n_divergence else return (b_search(page_headwords_proper, w0, 0, 32) - 1) + 995 + p_n_divergence end end

local function uc1_eng(x) return mw.getLanguage('en'):ucfirst(x) end

local function f_reverse_index(proper_or_both,reverse_index,title) local headwords_eng = {} local rix = reverse_index[title] if not (rix == nil) then for i, v in pairs(rix) do 			local pbc = proper_or_both[v] if pbc == nil then headwords_eng[#headwords_eng+1] = v			else headwords_eng[#headwords_eng+1] = uc1_eng(v) if pbc == "b" then headwords_eng[#headwords_eng+1] = v				end end end end return headwords_eng end

local function f_reverse_index_tab_delimited(proper_or_both,reverse_index_tab_delimited,title) local headwords_eng = {} for English_words in reverse_index_tab_delimited:gmatch(			"%f[^%z\n]" .. require "Module:utilities".pattern_escape(title)			.. "\t([^\n]+)%f[%z\n]") do		for word in English_words:gmatch("[^\t]+") do			table.insert(headwords_eng, word) end end return headwords_eng end

local function load_infinitives(w) --return mw.loadData("Module:R:Woodhouse/psia1_to_infs")[w] return require("Module:data tables").index_table("grc_RWoodhouse_lemma_to_infinitives", w) end

--grc_RWoodhouse_lemma_to_headwords local function print_html(title) --local psia1_to_infs = mw.loadData("Module:R:Woodhouse/psia1_to_infs")                   --for calculating title_addenda --local reverse_index = mw.loadData("Module:R:Woodhouse/reverse_index") 				  --for f_reverse_index_ local reverse_index_tab_delimited = require("Module:R:Woodhouse/reverse_index_tab_delimited")   --for f_reverse_index_tab_delimited local proper_or_both = mw.loadData("Module:R:Woodhouse/proper_or_both")                 --for f_reverse_index local page_headwords = mw.loadData("Module:R:Woodhouse/page_headwords")                 --for get_page -- page_headwords also requires a select_all function in data_tables to work efficiently local page_headwords_proper = mw.loadData("Module:R:Woodhouse/page_headwords_proper")   --for get_page local title_uc = mw.getContentLanguage:ucfirst(title) local title_addenda = load_infinitives(title) --psia1_to_infs[title] local titles = concat({title, title_uc}, title_addenda) local headwords_eng = {} for i, title in ipairs(titles) do		headwords_eng = concat(headwords_eng, f_reverse_index_tab_delimited(proper_or_both, reverse_index_tab_delimited, title)) --changed reverse_index to reverse_index_tab_delineated end headwords_eng = remove_duplicates(headwords_eng) table.sort(headwords_eng) local lst = {} local count = 0 for k, v in pairs(headwords_eng) do		local nPage = get_page(page_headwords, page_headwords_proper, v)		table.insert(lst, ""..v.." idem, page "..(nPage-5)..".") count = count + 1 end local expandtext = count .. " headword" .. ( count == 1 and "" or "s" ) table.insert(lst, 1, " ") table.insert(lst, " ") return table.concat(lst) end

function export.reverse_index(frame) local args = frame:getParent.args local title = args['w'] or mw.title.getCurrentTitle.text if (not args['w'] or args['w'] == "") and mw.title.getCurrentTitle.nsText == "Template" then return "" else return print_html(title) end end

return export