Module:Sandbox/trappist the monk/harv link test

require('Module:No globals');

local citerefs = {};

local redirect_patterns_harvc = { '{{%s*[Hh]arvc', '{{%s*[Cc]itec', } local redirects_harvc = { ['harvc'] = true, ['citec'] = true, } local redirect_patterns_vcite = { '{{%s*[Vv]cite', '{{%s*[Vv]ancite', '{{%s*[Cc]it ', } local redirects_vcite = { ['vcite book'] = true, ['vancite book'] = true, ['vancite report'] = true, ['vcite encyclopedia'] = true, ['vcite report'] = true, ['vcite conference'] = true, ['vancite conference'] = true, ['vcite journal'] = true, ['cit journal'] = true, ['cit paper'] = true, ['vancite journal'] = true, ['vcite news'] = true, ['vancite news'] = true, ['vcite web'] = true, ['vancite web'] = true, } local redirects_citation = { ['citation'] = true, ['cite'] = true, ['cite citation'] = true, ['cite study'] = true, ['cite technical standard'] = true, } local redirects_patent = { ['cite patent'] = true, ['citeref patent'] = true, ['ref patent'] = true, } local redirects_sfnref = { ['sfnref'] = true, ['harvid'] = true, } local redirects_date = { ['date'] = true, ['datetomos'] = true, ['formatdate'] = true, ['isotodmymdy'] = true, ['isotomos'] = true, } local aliases_contributor = {													-- these use pseudo-patterns in the same way as cs1|2; '#' represents 1 or more enumerator digits 'contributor#', 'contributor-last#', 'contributor#-last', 'contributor-surname#', 'contributor#-surname', } local aliases_author = { 'last#', 'author#', 'surname#', 'author-last#', 'author#-last', 'subject#', 'host#', } local aliases_editor = { 'editor#', 'editor-last#', 'editor#-last', 'editor-surname#', 'editor#-surname', } local aliases_harvc_author = { 'last#', 'author#', } local aliases_inventor = {														-- cite patent 'inventor#', 'inventor-last#', 'inventor#-last', 'inventor-surname#', 'inventor#-surname', 'invent#', 'invent-#', } local aliases_date = {															-- normal lua patterns '|%s*year%s*=%s*', '|%s*date%s*=%s*', '|%s*publication%-?date%s*=%s*', } local aliases_harvc_date = {													-- normal lua patterns '|%s*anchor%-year%s*=%s*', '|%s*year%s*=%s*', } local aliases_patent_date = {													-- normal lua patterns (cite patent) '|%s*issue%-date%s*=%s*', '|%s*gdate%s*=%s*', '|%s*publication%-date%s*=%s*', '|%s*pubdate%s*=%s*', } local patterns_date = {															-- normal lua patterns '^(%d%d%d%d–%d%d%d%d%l?)$',													-- YYYY–YYYY four-digit year range; with or without dab '^(%d%d%d%d–%d%d%l?)$',														-- YYYY–YY two-digit year range; with or without dab '^(c%. %d%d%d%d?%l?)$',														-- three- or four-digit circa year; with or without dab '(%d%d%d%d?%l?)$',															-- three- or four-digit year at end of date (dmy or mdy); with or without dab '^(%d%d%d%d?%l?)',															-- three- or four-digit year at end of date (ymd or YYYY); with or without dab '^(n%.d%.%l?)$',															-- 'no date' with dots; with or without dab '^(nd%l?)$',																-- 'no date' without dots; with or without dab }

--[[--< S F N R E F _ G E T >--

make a CITEREF from the contents of {{sfnref}} or {{harvid}}. this function assumes that {{sfnref}} and {{harvid}} are correctly formed.

]]

local function sfnref_get (template) template = template:gsub ('{{%s*(.-)%s*}}', '%1');							-- strip bounding template markup and trim local parts = mw.text.split (template, '%s*|%s*');							-- split at the pipe and remove extraneous space characters local citeref = {};

if redirects_sfnref[parts[1]:lower] then citeref[1] = 'CITEREF'; else return nil;																-- not an sfnref or harvid template end local i = 2;																-- indexer into parts{} table local j = 2;																-- indexer into citeref{} table which already has CITEREF at [1] while parts[i] and 7 > j do													-- loop through what should be just positional parameters for names and year (2-6 four names and a date) if not parts[i]:find ('=') then											-- look for equal sign (named paraneter in a template that doesn't support named parameters) citeref[j] = parts[i];												-- positional parameters are saved j = j+1;															-- bump the citeref{} indexer end i = i+ 1;																-- bump the parts{} indexer end

return table.concat (citeref, ''); end

--[[--< D A T E _ G E T >--

extract year from one of |year=, |date=, |publicationdate=, or |publication-date in that order. Does not error check (that is left to the cs1|2 templates to do)

also gets date from | ={{date|...}}

]]

local function date_get (template, aliases) local date; local rvalue;

for _, pattern in ipairs (aliases) do										-- spin through the date alias patterns rvalue = tostring(template):match (pattern);							-- is this | = used (tostring because something makes match think template is a table) if rvalue then rvalue = tostring(template):match (pattern .. '(%b{})');			-- is rvalue a template? if rvalue then rvalue = rvalue:gsub ('{{%s*(.-)%s*}}', '%1');					-- strip bounding template markup and trim local parts = mw.text.split (rvalue, '%s*|%s*');				-- split at the pipe and remove extraneous space characters

if redirects_date[parts[1]:lower] then						-- if parts[1] names {{date}} or redirect rvalue = parts[2];											-- assume that date template is properly formed, first positional parameter is the date else return '';													-- |date= holds some other template than {{date}} or redirect end else rvalue = template:match (pattern .. '([^|}]+)'); if rvalue then													-- if rvalue is something rvalue = mw.text.trim (rvalue);								-- trim it				end if not rvalue or '' == rvalue then								-- if rvale was nothing or trimed to nothing rvalue = nil;												-- ensure that it is unset so we can try the next parameter in the list end end

if rvalue then for _, pattern in ipairs (patterns_date) do						-- spin through the recognized date formats date = rvalue:match (pattern);								-- attempt to extract year portion according to the pattern if date then return date;											-- matched so return; end end break;															-- found a date but it was malformed so abandon end end end

return '';																	-- no date param or date param doesn't hold a recognized date; empty string for concatenation end

--[[--< V N A M E S _ G E T >--

extract names from |vauthors= or |veditors=; there is no |vcontributors= parameter.

splits the v parameter value at the comma; correctly handles accept-as-witten markup when used to wrap a comma- separated names (corporate)

]]

local function vnames_get (params, vparam) local vnames = {};															-- first four author or editor names go here local split = {};															-- temp table to assist in decoding accept-as-witten-markup

if params[vparam] then														-- test for |vauthors= or |veditor= split = mw.text.split (params[vparam], '%s*,%s*');						-- this will separate portions of ((Black, Brown, White, an Co.)) local i = 1;															-- an indexer while split[i] do			if split[i]:match ('^%(%(.*[^%)][^%)]$') then						-- first segment of comma-separated accept-as-witten; this segment has the opening doubled parens local name = split[i]; i=i+1;															-- bump indexer to next segment while split[i] do name = name .. ', ' .. split[i];							-- concatenate with previous segments if split[i]:match ('^.*%)%)$') then							-- if this table member has the closing doubled parens break;													-- and done reassembling so					end i=i+1;														-- bump indexer end table.insert (vnames, name);									-- and add accept-as-witten name to the vnames table else table.insert (vnames, split[i]);								-- and add name to the vnames table end i=i+1;																	-- bump indexer if 5 == i then break; end												-- limit to four names end

for i, vname in ipairs (vnames) do			if not vname:match ('%(%(.-%)%)') then								-- without accept-this-value-as-written markup vnames[i] = vname:gsub ('(.-)%s+%u+$', '%1');					-- extract and save surname(s) end end for i, vname in ipairs (vnames) do										-- repeat, this time for accept-this-value-as-written markup vnames[i] = vname:gsub ('%(%((.-)%)%)', '%1');						-- remove markup if present and save the whole name end end

return 0 ~= #vnames and table.concat (vnames) or nil						-- return a concatenation of the vnames; nil else end

--[[--< N A M E S _ G E T >

cs1|2 makes CITEREF anchor from contributor, author, or editor name-lists in that order

get the names from the cs1|2 template; if there are no contributor names, try author names, then try editor names.

returns concatenated names in enumeration order when successful; nil else

missing names (missing or empty |lastn= parameter) are omitted but the other names are included.

]]

local function names_get (params, aliases_list) local names = {};															-- first four author or editor names go here local enum_alias;															-- alias with '#' replaced with a digit

for i, alias in ipairs (aliases_list) do		for enum=1, 4 do			enum_alias = alias:gsub ('#', enum);								-- replace '#' to make 'lastn' if 1 == enum then													-- because |last= and last1= are exact aliases if params[enum_alias] then										-- test |last1= first names[enum] = params[enum_alias];							-- found so save the value assigned to |last1= else enum_alias = alias:gsub ('#', '');							-- replace '#' to make 'last' if params[enum_alias] then names[enum] = params[enum_alias];						-- found so save the value assigned to |last= end end else																-- here for enum 2, 3, 4 if params[enum_alias] then names[enum] = params[enum_alias];							-- found so save the value assigned to |lastn= end end end end

for i, name in ipairs (names) do											-- spin through the names table and names[i] = name:gsub('%(%((.-)%)%)', '%1');								-- remove accept-as-written markup if present end

return 0 ~= #names and table.concat (names) or nil							-- return a concatenation of the names; nil else end

--[[--< T E M P L A T E _ S T R I P >--

removes the citation or havrc template's {{ and }} markup then removes, in whole, any templates found inside the citation or harvc template.

Templates are not allowed in parameters that are made part of COinS metadata; yet, they will appear. cs1|2 does not see the template markup but instead sees the result of the template as html. cs1|2 strips the html which leaves the displayed value for the CITEREF. We can't do that here so, because templates aren't allowed in parameters, we simply discard any templates found in the cs1|2 template.

this may leave a |lastn= parameter empty which will be treated as if it were really empty as cs1|2 do (three authors,
 * last2= empty -> CITEREFLast1Last3YYYY (the harv and sfn render: 'Last1, & Last3 YYYY' with CITEREFLast1Last3YYYY).

]]

local function template_strip (template) template = template:gsub ('^{{', ):gsub ('}}$', , 1);					-- remove outer {{ and }} (cs1|2 template delimiters) template = template:gsub ('%b{}', '');										-- remove any templates from the cs1|2 template return template; end

--[[--< E S C A P E _ L U A _ M A G I C _ C H A R S >--

Returns a string where all of lua's magic characters have been escaped. This is important because functions like string.gsub treat their pattern and replace strings as patterns, not literal strings. ]]

local function escape_lua_magic_chars (argument) argument = argument:gsub("%%", "%%%%");										-- replace % with %% argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1");				-- replace all other lua magic pattern characters return argument; end

--[=[-< W I K I L I N K _ S T R I P >--

Wikilink markup does not belong in CITEREF and can / does confuse the code that parses apart citation and harvc templates so here we remove any wiki markup: label -> label link -> link ]=]

local function wikilink_strip (template) for wikilink in template:gmatch ('%[%b[]%]') do								-- get a wikilink if wikilink then template = template:gsub ('%[%b[]%]', '__57r1P__', 1);				-- install a marker if wikilink:match ('%[%[.-|(.-)%]%]') then wikilink = wikilink:match ('%[%[.-|(.-)%]%]');					-- extract label from complex label wikilink else wikilink = wikilink:match ('%[%[(.-)%]%]');						-- extract link from simple link wikilinks end wikilink = escape_lua_magic_chars (wikilink);						-- in case there are percent encopde characters in the wikilink template = template:gsub ('__57r1P__', wikilink, 1);				-- replace the marker with the appropriate text end end

return template; end

--[[--< T E M P L A T E _ N A M E _ G E T >

return the citation or harvc template's name; convert to lower case and trim leading and trailing whitespace;

when the template is a sandbox the subpage portion of the template name is omitted from the returned template name {{Cite book/new |...}} returns cite book

]]

local function template_name_get (template) local template_name = template:match ('{{%s*([^/|]+)');						-- get template name; ignore subpages ~/new, ~/sandbox if not template_name then return nil;																-- could not get template name from (possibly corrupt) template; extraneous opening { mid template can cause this; end; template_name = template_name:gsub ('%s*$', '');							-- trim whitespace template_name = template_name:lower;										-- and lowercase only return template_name; end

--[[--< T E M P L A T E _ P A R A M S _ G E T >

parse apart a template's parameters and store in the params table where key is the parameter's name and value is the parameter's value; empty parameters are not saved

]]

local function template_params_get (template, params) template = wikilink_strip (template);										-- because piped wikilinks confuse code that builds params{} and because plain wikilinks not allowed in CITEREF -- strip templates after getting |ref= value because |ref={{sfnref}} and |ref={{harvid}} are allowed template = template_strip (template);										-- because template markup can confuse code that builds params{} and because templates in name parameters are not allowed

template = template:gsub ('|%s*|', '|');									-- when pipe follows pipe with ot without white space, remove extraneous pipe

for param, value in template:gmatch ('|%s*([^=]-)%s*=%s*([^|}]+)') do		-- build a table of template parameters and their values if value and '' ~= value then											-- don't add if value is nil or an empty string params[param] = mw.text.trim (value);								-- add trimmed value else end end end

--[[--< C I T E R E F _ M A K E >--

inspect |ref= to decide what to do: |ref=harv									- get names and date from template parameters |ref={{SfnRef|name|name|name|name|year}}	- assemble CITEREF from {{sfnref}} positional parameters |ref={{Harvid|name|name|name|name|year}}	- assemble CITEREF from {{harvid}} positional parameters |ref=none									- skip; do nothing because CITEREF intentionally suppressed; TODO: keep with a type code of '0'? |ref=										- empty or missing for cs1: skip if |mode=cs2: spoof |ref=harv for cs2: get names and date from template parameters if |mode=cs1: skip

|ref= 									- save param value because may match CITEREF override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter

]]

local function citeref_make (template) local ref;																	-- content of |ref= local template_name;														-- name of the template for cs2 detection local citeref;																-- the assembled CITEREF from this template local date; local params = {};															-- table of cs1|2 parameters template_name = template_name_get (template);								-- get lowercase trimmed template name; ignore subpages ~/new, ~/sandbox if not template_name then return nil;																-- could not extract template name from (possibly corrupted) template (extraneous opening { in the template will cause this) end if redirects_harvc[template_name] then date = date_get (template, aliases_harvc_date);							-- get date; done here because might be in {{date}} elseif redirects_patent[template_name] then date = date_get (template, aliases_patent_date);						-- get date; done here because might be in {{date}} else date = date_get (template, aliases_date); end ref = template:match ('|%s*ref%s*=%s*(%b{})');								-- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the cs1|2 template if not ref and not redirects_harvc[template_name] then						-- |ref={{template}} not found; ignore |ref= when template is {{harvc}} if template:match ('|%s*ref%s*=([^|}]+)') then							-- if there is a |ref= param with an assigned value that is not a template ref = template:match ('|%s*ref%s*=([^|}]+)');						-- get the value if ref then															-- nil when |ref=|... or when |ref=}} ref = mw.text.trim (ref);										-- and trim end

else																	-- here when |ref= missing or empty if redirects_citation[template_name] then							-- could be cs2 if template:match ('|%s*mode%s*=%s*cs1') then return nil;													-- |ref= missing or empty; citation template but |mode=cs1 else ref = 'harv';												-- spoof to handle cs2 as if it were cs1 with |ref=harv end else																-- |ref= missing or empty; not a cs2 template if template:match ('|%s*mode%s*=%s*cs2') then ref = 'harv';												-- |ref= missing or empty; not a cs2 template; |mode=cs2; spoof as if it were cs1 with |ref=harv end end end end

template_params_get (template, params);										-- build a table of template parameters and their values

if not ref then																-- |ref= not set, might be cite LSA which doesn't support |ref= if 'cite lsa' == template_name then return 'CITEREF' .. params.last .. params.year;						-- cite LSA always creates CITEREF using only |last= and |year= (no aliases) end

if redirects_harvc[template_name] then									-- if this is a harvc template if params.id then return params.id;												-- |id= value as written end citeref = names_get (params, aliases_harvc_author);					-- get the harvc contributor names

if citeref then														-- if names were gotten citeref = 'CITEREF' .. citeref .. date; end return citeref; end

return nil;																-- not cite LSA or harvc so done end

if 'harv' == ref then														-- |ref=harv if redirects_patent[template_name] then									-- if this is a cite patent template citeref = names_get (params, aliases_inventor);						-- inventor names only else																	-- cs1|2 template citeref = names_get (params, aliases_contributor) or				-- get contributor, author, or editor names names_get (params, aliases_author) or				vnames_get (params, 'vauthors') or								-- |vauthors= names_get (params, aliases_editor) or				vnames_get (params, 'veditors');								-- |veditors= end

if citeref then															-- if names were gotten citeref = 'CITEREF' .. citeref .. date; end

elseif ref:match ('%b{}') then												-- ref holds a template citeref = sfnref_get (ref);												-- returns content of {{sfnref}} or {{harvid}}; nil else

elseif 'none' == ref and not redirects_patent[template_name] then			-- |ref=none; not supported by cite patent return nil;																-- CITEREF expicitly suppressed elseif '' ~= ref then														-- |ref= citeref = ref;															-- may match CITEREF override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter end return citeref;																-- citeref text; nil else end

--[[--< A D D _ C I T E R E F >

adds a citeref to the citerefs table; no return value

]]

local function add_citeref (citeref, citerefs) if citeref then																-- if there was a CITEREF extracted citeref = mw.uri.anchorEncode (citeref);								-- encode to remove wikimarkup, convert spaces to underscores etc if not citerefs[citeref] then											-- if not already saved citerefs[citeref] = 1;												-- save it 		else																	-- here when this CITEREF already saved citerefs[citeref] = 2;												-- to indicate that there are multiple same name/date citations end end end

--[[--< C I T E R E F _ L I S T _ M A K E >

makes a list of CITEREF anchors from cs1|2, cs1|2-like, vcite xxx, and harvc templates

Because cs1|2 wrapper templates can, and often do, hide |ref=, the author and date parameters inside the wrapper, these parameters are not available in the article's wikisource so {{harv}}, {{sfn}}, and {{harvc}} templates that link correctly to those wrapper templates will incorrectly show error messages. Use |ignore-err=yes in the {{harv}}, {{sfn}}, and {{harvc}} templates to supress the error message.

]]

local function citeref_list_make local article_content = mw.title.getCurrentTitle:getContent or ;		-- get the content of the article or ; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625 article_content = article_content:gsub (' %s*{{.-}}%s* ', '');	-- remove templates inside nowiki tags article_content = article_content:gsub ('<!%-%-.-%-%->', '');				-- remove html comments and their content if '' == article_content then												-- when there is no article content return '';																-- no point in continuing end local template;																-- place to hold the template that we found local citeref;																-- place to hold CITEREFs as they are extracted / decoded local tstart, tend = article_content:find ('{{%s*[Cc]it[ae]');				-- find the first cs1|2-like template

while tstart do																-- nil when cs1|2 template not found template = article_content:match ('%b{}', tstart);						-- get the whole template

if template then														-- necessary? citeref = citeref_make (template);									-- extract CITEREF from this template add_citeref (citeref, citerefs) end tstart = tend;															-- reset the search starting index tstart, tend = article_content:find ('{{%s*[Cc]it[ae]', tstart);		-- search for another cs1|2 template end

for _, pattern in ipairs (redirect_patterns_harvc) do		tstart, tend = article_content:find (pattern);							-- find the first harvc template while tstart do															-- nil when cs1|2 template not found template = article_content:match ('%b{}', tstart);					-- get the whole template if template then													-- necessary? citeref = citeref_make (template);								-- extract CITEREF from this template add_citeref (citeref, citerefs); end tstart = tend;														-- reset the search starting index tstart, tend = article_content:find (pattern, tstart);				-- search for another harvc template end end

for _, pattern in ipairs (redirect_patterns_vcite) do						-- for each of the vcite family template base patterns tstart, tend = article_content:find (pattern);							-- find the first vcite template while tstart do															-- nil when vcite template not found template = article_content:match ('%b{}', tstart);					-- get the whole template if template then													-- necessary? local ref = template:match ('|%s*ref%s*=%s*(%b{})');			-- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the vcite template if ref then														-- |ref={{template}} citeref = sfnref_get (ref);									-- returns content of {{sfnref}} or {{harvid}}; nil else add_citeref (citeref, citerefs); else local params = {}; local template_name = template_name_get (template);			-- get lowercase trimmed template name; ignore subpages ~/new, ~/sandbox

template_params_get (template, params);						-- build a table of template parameters and their values

citeref = params['ref'];									-- when both set, vcite uses value from |ref= if not citeref and params['harvid'] then citeref = 'CITEREF' .. params['harvid'];				-- in vcite, |harvid= auto-adds 'CITEREF' prefix to the value in |harvid= end add_citeref (citeref, citerefs); end end

tstart = tend;														-- reset the search starting index tstart, tend = article_content:find (pattern, tstart);				-- search for another vcite template end end tstart, tend = article_content:find ('{{%s*[Ww]ikicite');					-- find the first {{wikicite}} template

while tstart do																-- nil when cs1|2 template not found template = article_content:match ('%b{}', tstart);						-- get the whole template

if template then local ref = template:match ('|%s*ref%s*=%s*(%b{})');				-- first look for |ref={{sfnref}} or |ref={{harvid}} if ref then citeref = sfnref_get (ref); elseif template:match ('|%s*ref%s*=([^|}]+)') then citeref = template:match ('|%s*ref%s*=([^|}]+)');				-- plain-text elseif template:match ('|%s*id%s*=%s*(%b{})') then citeref = template:match ('|%s*id%s*=%s*(%b{})'); elseif template:match ('|%s*id%s*=([^|}]+)') then citeref = 'Reference-' .. template:match ('|%s*id%s*=([^|}]+)');	-- plain-text end if citeref then add_citeref (citeref, citerefs); end end tstart = tend;															-- reset the search starting index tstart, tend = article_content:find ('{{%s*[Ww]ikicite', tstart);		-- search for another cs1|2 template end mw.log (mw.dumpObject (citerefs)) return citerefs; end

----< E X P O R T E D _ T A B L E S >

return { citerefs = citeref_list_make ,											-- table of CITEREFs available in this article }