Module:Sandbox/trappist the monk/harv link test
Documentation for this module may be created at Module:Sandbox/trappist the monk/harv link test/doc
require('Module:No globals');
local citerefs = {};
local redirect_patterns_harvc = {
'{{%s*[Hh]arvc',
'{{%s*[Cc]itec',
}
local redirects_harvc = {
['harvc'] = true,
['citec'] = true,
}
local redirect_patterns_vcite = {
'{{%s*[Vv]cite',
'{{%s*[Vv]ancite',
'{{%s*[Cc]it ',
}
local redirects_vcite = {
['vcite book'] = true,
['vancite book'] = true,
['vancite report'] = true,
['vcite encyclopedia'] = true,
['vcite report'] = true,
['vcite conference'] = true,
['vancite conference'] = true,
['vcite journal'] = true,
['cit journal'] = true,
['cit paper'] = true,
['vancite journal'] = true,
['vcite news'] = true,
['vancite news'] = true,
['vcite web'] = true,
['vancite web'] = true,
}
local redirects_citation = {
['citation'] = true,
['cite'] = true,
['cite citation'] = true,
['cite study'] = true,
['cite technical standard'] = true,
}
local redirects_patent = {
['cite patent'] = true,
['citeref patent'] = true,
['ref patent'] = true,
}
local redirects_sfnref = {
['sfnref'] = true,
['harvid'] = true,
}
local redirects_date = {
['date'] = true,
['datetomos'] = true,
['formatdate'] = true,
['isotodmymdy'] = true,
['isotomos'] = true,
}
local aliases_contributor = { -- these use pseudo-patterns in the same way as cs1|2; '#' represents 1 or more enumerator digits
'contributor#',
'contributor-last#',
'contributor#-last',
'contributor-surname#',
'contributor#-surname',
}
local aliases_author = {
'last#',
'author#',
'surname#',
'author-last#',
'author#-last',
'subject#',
'host#',
}
local aliases_editor = {
'editor#',
'editor-last#',
'editor#-last',
'editor-surname#',
'editor#-surname',
}
local aliases_harvc_author = {
'last#',
'author#',
}
local aliases_inventor = { -- cite patent
'inventor#',
'inventor-last#',
'inventor#-last',
'inventor-surname#',
'inventor#-surname',
'invent#',
'invent-#',
}
local aliases_date = { -- normal lua patterns
'|%s*year%s*=%s*',
'|%s*date%s*=%s*',
'|%s*publication%-?date%s*=%s*',
}
local aliases_harvc_date = { -- normal lua patterns
'|%s*anchor%-year%s*=%s*',
'|%s*year%s*=%s*',
}
local aliases_patent_date = { -- normal lua patterns (cite patent)
'|%s*issue%-date%s*=%s*',
'|%s*gdate%s*=%s*',
'|%s*publication%-date%s*=%s*',
'|%s*pubdate%s*=%s*',
}
local patterns_date = { -- normal lua patterns
'^(%d%d%d%d–%d%d%d%d%l?)$', -- YYYY–YYYY four-digit year range; with or without dab
'^(%d%d%d%d–%d%d%l?)$', -- YYYY–YY two-digit year range; with or without dab
'^(c%. %d%d%d%d?%l?)$', -- three- or four-digit circa year; with or without dab
'(%d%d%d%d?%l?)$', -- three- or four-digit year at end of date (dmy or mdy); with or without dab
'^(%d%d%d%d?%l?)', -- three- or four-digit year at end of date (ymd or YYYY); with or without dab
'^(n%.d%.%l?)$', -- 'no date' with dots; with or without dab
'^(nd%l?)$', -- 'no date' without dots; with or without dab
}
--[[--------------------------< S F N R E F _ G E T >----------------------------------------------------------
make a CITEREF from the contents of {{sfnref}} or {{harvid}}. this function assumes that {{sfnref}} and {{harvid}}
are correctly formed.
]]
local function sfnref_get (template)
template = template:gsub ('{{%s*(.-)%s*}}', '%1'); -- strip bounding template markup and trim
local parts = mw.text.split (template, '%s*|%s*'); -- split at the pipe and remove extraneous space characters
local citeref = {};
if redirects_sfnref[parts[1]:lower()] then
citeref[1] = 'CITEREF';
else
return nil; -- not an sfnref or harvid template
end
local i = 2; -- indexer into parts{} table
local j = 2; -- indexer into citeref{} table which already has CITEREF at [1]
while parts[i] and 7 > j do -- loop through what should be just positional parameters for names and year (2-6 four names and a date)
if not parts[i]:find ('=') then -- look for equal sign (named paraneter in a template that doesn't support named parameters)
citeref[j] = parts[i]; -- positional parameters are saved
j = j+1; -- bump the citeref{} indexer
end
i = i+ 1; -- bump the parts{} indexer
end
return table.concat (citeref, '');
end
--[[--------------------------< D A T E _ G E T >--------------------------------------------------------------
extract year from one of |year=, |date=, |publicationdate=, or |publication-date in that order. Does not error
check (that is left to the cs1|2 templates to do)
also gets date from |<date alias>={{date|...}}
]]
local function date_get (template, aliases)
local date;
local rvalue;
for _, pattern in ipairs (aliases) do -- spin through the date alias patterns
rvalue = tostring(template):match (pattern); -- is this |<date alias>= used (tostring() because something makes match() think template is a table)
if rvalue then
rvalue = tostring(template):match (pattern .. '(%b{})'); -- is rvalue a template?
if rvalue then
rvalue = rvalue:gsub ('{{%s*(.-)%s*}}', '%1'); -- strip bounding template markup and trim
local parts = mw.text.split (rvalue, '%s*|%s*'); -- split at the pipe and remove extraneous space characters
if redirects_date[parts[1]:lower()] then -- if parts[1] names {{date}} or redirect
rvalue = parts[2]; -- assume that date template is properly formed, first positional parameter is the date
else
return ''; -- |date= holds some other template than {{date}} or redirect
end
else
rvalue = template:match (pattern .. '([^|}]+)');
if rvalue then -- if rvalue is something
rvalue = mw.text.trim (rvalue); -- trim it
end
if not rvalue or '' == rvalue then -- if rvale was nothing or trimed to nothing
rvalue = nil; -- ensure that it is unset so we can try the next parameter in the list
end
end
if rvalue then
for _, pattern in ipairs (patterns_date) do -- spin through the recognized date formats
date = rvalue:match (pattern); -- attempt to extract year portion according to the pattern
if date then
return date; -- matched so return;
end
end
break; -- found a date but it was malformed so abandon
end
end
end
return ''; -- no date param or date param doesn't hold a recognized date; empty string for concatenation
end
--[[--------------------------< V N A M E S _ G E T >----------------------------------------------------------
extract names from |vauthors= or |veditors=; there is no |vcontributors= parameter.
splits the v parameter value at the comma; correctly handles accept-as-witten markup when used to wrap a comma-
separated names (corporate)
]]
local function vnames_get (params, vparam)
local vnames = {}; -- first four author or editor names go here
local split = {}; -- temp table to assist in decoding accept-as-witten-markup
if params[vparam] then -- test for |vauthors= or |veditor=
split = mw.text.split (params[vparam], '%s*,%s*'); -- this will separate portions of ((Black, Brown, White, an Co.))
local i = 1; -- an indexer
while split[i] do
if split[i]:match ('^%(%(.*[^%)][^%)]$') then -- first segment of comma-separated accept-as-witten; this segment has the opening doubled parens
local name = split[i];
i=i+1; -- bump indexer to next segment
while split[i] do
name = name .. ', ' .. split[i]; -- concatenate with previous segments
if split[i]:match ('^.*%)%)$') then -- if this table member has the closing doubled parens
break; -- and done reassembling so
end
i=i+1; -- bump indexer
end
table.insert (vnames, name); -- and add accept-as-witten name to the vnames table
else
table.insert (vnames, split[i]); -- and add name to the vnames table
end
i=i+1; -- bump indexer
if 5 == i then break; end -- limit to four names
end
for i, vname in ipairs (vnames) do
if not vname:match ('%(%(.-%)%)') then -- without accept-this-value-as-written markup
vnames[i] = vname:gsub ('(.-)%s+%u+$', '%1'); -- extract and save surname(s)
end
end
for i, vname in ipairs (vnames) do -- repeat, this time for accept-this-value-as-written markup
vnames[i] = vname:gsub ('%(%((.-)%)%)', '%1'); -- remove markup if present and save the whole name
end
end
return 0 ~= #vnames and table.concat (vnames) or nil -- return a concatenation of the vnames; nil else
end
--[[--------------------------< N A M E S _ G E T >------------------------------------------------------------
cs1|2 makes CITEREF anchor from contributor, author, or editor name-lists in that order
get the names from the cs1|2 template; if there are no contributor names, try author names, then try editor names.
returns concatenated names in enumeration order when successful; nil else
missing names (missing or empty |lastn= parameter) are omitted but the other names are included.
]]
local function names_get (params, aliases_list)
local names = {}; -- first four author or editor names go here
local enum_alias; -- alias with '#' replaced with a digit
for i, alias in ipairs (aliases_list) do
for enum=1, 4 do
enum_alias = alias:gsub ('#', enum); -- replace '#' to make 'lastn'
if 1 == enum then -- because |last= and last1= are exact aliases
if params[enum_alias] then -- test |last1= first
names[enum] = params[enum_alias]; -- found so save the value assigned to |last1=
else
enum_alias = alias:gsub ('#', ''); -- replace '#' to make 'last'
if params[enum_alias] then
names[enum] = params[enum_alias]; -- found so save the value assigned to |last=
end
end
else -- here for enum 2, 3, 4
if params[enum_alias] then
names[enum] = params[enum_alias]; -- found so save the value assigned to |lastn=
end
end
end
end
for i, name in ipairs (names) do -- spin through the names table and
names[i] = name:gsub('%(%((.-)%)%)', '%1'); -- remove accept-as-written markup if present
end
return 0 ~= #names and table.concat (names) or nil -- return a concatenation of the names; nil else
end
--[[--------------------------< T E M P L A T E _ S T R I P >--------------------------------------------------
removes the citation or havrc template's {{ and }} markup then removes, in whole, any templates found inside the
citation or harvc template.
Templates are not allowed in parameters that are made part of COinS metadata; yet, they will appear. cs1|2 does
not see the template markup but instead sees the result of the template as html. cs1|2 strips the html which
leaves the displayed value for the CITEREF. We can't do that here so, because templates aren't allowed in
parameters, we simply discard any templates found in the cs1|2 template.
this may leave a |lastn= parameter empty which will be treated as if it were really empty as cs1|2 do (three authors,
|last2= empty -> CITEREFLast1Last3YYYY (the harv and sfn render: 'Last1, & Last3 YYYY' with CITEREFLast1Last3YYYY).
]]
local function template_strip (template)
template = template:gsub ('^{{', ''):gsub ('}}$', '', 1); -- remove outer {{ and }} (cs1|2 template delimiters)
template = template:gsub ('%b{}', ''); -- remove any templates from the cs1|2 template
return template;
end
--[[--------------------------< E S C A P E _ L U A _ M A G I C _ C H A R S >----------------------------------
Returns a string where all of lua's magic characters have been escaped. This is important because functions like
string.gsub() treat their pattern and replace strings as patterns, not literal strings.
]]
local function escape_lua_magic_chars (argument)
argument = argument:gsub("%%", "%%%%"); -- replace % with %%
argument = argument:gsub("([%^%$%(%)%.%[%]%*%+%-%?])", "%%%1"); -- replace all other lua magic pattern characters
return argument;
end
--[=[-------------------------< W I K I L I N K _ S T R I P >--------------------------------------------------
Wikilink markup does not belong in CITEREF and can / does confuse the code that parses apart citation and harvc
templates so here we remove any wiki markup:
[[link|label]] -> label
[[link]] -> link
]=]
local function wikilink_strip (template)
for wikilink in template:gmatch ('%[%b[]%]') do -- get a wikilink
if wikilink then
template = template:gsub ('%[%b[]%]', '__57r1P__', 1); -- install a marker
if wikilink:match ('%[%[.-|(.-)%]%]') then
wikilink = wikilink:match ('%[%[.-|(.-)%]%]'); -- extract label from complex [[link|label]] wikilink
else
wikilink = wikilink:match ('%[%[(.-)%]%]'); -- extract link from simple [[link]] wikilinks
end
wikilink = escape_lua_magic_chars (wikilink); -- in case there are percent encopde characters in the wikilink
template = template:gsub ('__57r1P__', wikilink, 1); -- replace the marker with the appropriate text
end
end
return template;
end
--[[--------------------------< T E M P L A T E _ N A M E _ G E T >--------------------------------------------
return the citation or harvc template's name; convert to lower case and trim leading and trailing whitespace;
when the template is a sandbox the subpage portion of the template name is omitted from the returned template name
{{Cite book/new |...}} returns cite book
]]
local function template_name_get (template)
local template_name = template:match ('{{%s*([^/|]+)'); -- get template name; ignore subpages ~/new, ~/sandbox
if not template_name then
return nil; -- could not get template name from (possibly corrupt) template; extraneous opening { mid template can cause this;
end;
template_name = template_name:gsub ('%s*$', ''); -- trim whitespace
template_name = template_name:lower(); -- and lowercase only
return template_name;
end
--[[--------------------------< T E M P L A T E _ P A R A M S _ G E T >----------------------------------------
parse apart a template's parameters and store in the params table where key is the parameter's name and value is
the parameter's value; empty parameters are not saved
]]
local function template_params_get (template, params)
template = wikilink_strip (template); -- because piped wikilinks confuse code that builds params{} and because plain wikilinks not allowed in CITEREF
-- strip templates after getting |ref= value because |ref={{sfnref}} and |ref={{harvid}} are allowed
template = template_strip (template); -- because template markup can confuse code that builds params{} and because templates in name parameters are not allowed
template = template:gsub ('|%s*|', '|'); -- when pipe follows pipe with ot without white space, remove extraneous pipe
for param, value in template:gmatch ('|%s*([^=]-)%s*=%s*([^|}]+)') do -- build a table of template parameters and their values
if value and '' ~= value then -- don't add if value is nil or an empty string
params[param] = mw.text.trim (value); -- add trimmed value else
end
end
end
--[[--------------------------< C I T E R E F _ M A K E >------------------------------------------------------
inspect |ref= to decide what to do:
|ref=harv - get names and date from template parameters
|ref={{SfnRef|name|name|name|name|year}} - assemble CITEREF from {{sfnref}} positional parameters
|ref={{Harvid|name|name|name|name|year}} - assemble CITEREF from {{harvid}} positional parameters
|ref=none - skip; do nothing because CITEREF intentionally suppressed; TODO: keep with a type code of '0'?
|ref= - empty or missing
for cs1: skip
if |mode=cs2: spoof |ref=harv
for cs2: get names and date from template parameters
if |mode=cs1: skip
|ref=<text> - save param value because may match CITEREF override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter
]]
local function citeref_make (template)
local ref; -- content of |ref=
local template_name; -- name of the template for cs2 detection
local citeref; -- the assembled CITEREF from this template
local date;
local params = {}; -- table of cs1|2 parameters
template_name = template_name_get (template); -- get lowercase trimmed template name; ignore subpages ~/new, ~/sandbox
if not template_name then
return nil; -- could not extract template name from (possibly corrupted) template (extraneous opening { in the template will cause this)
end
if redirects_harvc[template_name] then
date = date_get (template, aliases_harvc_date); -- get date; done here because might be in {{date}}
elseif redirects_patent[template_name] then
date = date_get (template, aliases_patent_date); -- get date; done here because might be in {{date}}
else
date = date_get (template, aliases_date);
end
ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the cs1|2 template
if not ref and not redirects_harvc[template_name] then -- |ref={{template}} not found; ignore |ref= when template is {{harvc}}
if template:match ('|%s*ref%s*=([^|}]+)') then -- if there is a |ref= param with an assigned value that is not a template
ref = template:match ('|%s*ref%s*=([^|}]+)'); -- get the value
if ref then -- nil when |ref=|... or when |ref=}}
ref = mw.text.trim (ref); -- and trim
end
else -- here when |ref= missing or empty
if redirects_citation[template_name] then -- could be cs2
if template:match ('|%s*mode%s*=%s*cs1') then
return nil; -- |ref= missing or empty; citation template but |mode=cs1
else
ref = 'harv'; -- spoof to handle cs2 as if it were cs1 with |ref=harv
end
else -- |ref= missing or empty; not a cs2 template
if template:match ('|%s*mode%s*=%s*cs2') then
ref = 'harv'; -- |ref= missing or empty; not a cs2 template; |mode=cs2; spoof as if it were cs1 with |ref=harv
end
end
end
end
template_params_get (template, params); -- build a table of template parameters and their values
if not ref then -- |ref= not set, might be cite LSA which doesn't support |ref=
if 'cite lsa' == template_name then
return 'CITEREF' .. params.last .. params.year; -- cite LSA always creates CITEREF using only |last= and |year= (no aliases)
end
if redirects_harvc[template_name] then -- if this is a harvc template
if params.id then
return params.id; -- |id= value as written
end
citeref = names_get (params, aliases_harvc_author); -- get the harvc contributor names
if citeref then -- if names were gotten
citeref = 'CITEREF' .. citeref .. date;
end
return citeref;
end
return nil; -- not cite LSA or harvc so done
end
if 'harv' == ref then -- |ref=harv
if redirects_patent[template_name] then -- if this is a cite patent template
citeref = names_get (params, aliases_inventor); -- inventor names only
else -- cs1|2 template
citeref = names_get (params, aliases_contributor) or -- get contributor, author, or editor names
names_get (params, aliases_author) or
vnames_get (params, 'vauthors') or -- |vauthors=
names_get (params, aliases_editor) or
vnames_get (params, 'veditors'); -- |veditors=
end
if citeref then -- if names were gotten
citeref = 'CITEREF' .. citeref .. date;
end
elseif ref:match ('%b{}') then -- ref holds a template
citeref = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else
elseif 'none' == ref and not redirects_patent[template_name] then -- |ref=none; not supported by cite patent
return nil; -- CITEREF expicitly suppressed
elseif '' ~= ref then -- |ref=<text>
citeref = ref; -- <text> may match CITEREF override value in {{harv}} template |ref= parameter or {{harvc}} |id= parameter
end
return citeref; -- citeref text; nil else
end
--[[--------------------------< A D D _ C I T E R E F >--------------------------------------------------------
adds a citeref to the citerefs table; no return value
]]
local function add_citeref (citeref, citerefs)
if citeref then -- if there was a CITEREF extracted
citeref = mw.uri.anchorEncode (citeref); -- encode to remove wikimarkup, convert spaces to underscores etc
if not citerefs[citeref] then -- if not already saved
citerefs[citeref] = 1; -- save it
else -- here when this CITEREF already saved
citerefs[citeref] = 2; -- to indicate that there are multiple same name/date citations
end
end
end
--[[--------------------------< C I T E R E F _ L I S T _ M A K E >--------------------------------------------
makes a list of CITEREF anchors from cs1|2, cs1|2-like, vcite xxx, and harvc templates
Because cs1|2 wrapper templates can, and often do, hide |ref=, the author and date parameters inside the wrapper,
these parameters are not available in the article's wikisource so {{harv}}, {{sfn}}, and {{harvc}} templates that
link correctly to those wrapper templates will incorrectly show error messages. Use |ignore-err=yes in the {{harv}},
{{sfn}}, and {{harvc}} templates to supress the error message.
]]
local function citeref_list_make ()
local article_content = mw.title.getCurrentTitle():getContent() or ''; -- get the content of the article or ''; new pages edited w/ve do not have 'content' until saved; ve does not preview; phab:T221625
article_content = article_content:gsub ('<nowiki>%s*{{.-}}%s*</nowiki>', ''); -- remove templates inside nowiki tags
article_content = article_content:gsub ('<!%-%-.-%-%->', ''); -- remove html comments and their content
if '' == article_content then -- when there is no article content
return ''; -- no point in continuing
end
local template; -- place to hold the template that we found
local citeref; -- place to hold CITEREFs as they are extracted / decoded
local tstart, tend = article_content:find ('{{%s*[Cc]it[ae]'); -- find the first cs1|2-like template
while tstart do -- nil when cs1|2 template not found
template = article_content:match ('%b{}', tstart); -- get the whole template
if template then -- necessary?
citeref = citeref_make (template); -- extract CITEREF from this template
add_citeref (citeref, citerefs)
end
tstart = tend; -- reset the search starting index
tstart, tend = article_content:find ('{{%s*[Cc]it[ae]', tstart); -- search for another cs1|2 template
end
for _, pattern in ipairs (redirect_patterns_harvc) do
tstart, tend = article_content:find (pattern); -- find the first harvc template
while tstart do -- nil when cs1|2 template not found
template = article_content:match ('%b{}', tstart); -- get the whole template
if template then -- necessary?
citeref = citeref_make (template); -- extract CITEREF from this template
add_citeref (citeref, citerefs);
end
tstart = tend; -- reset the search starting index
tstart, tend = article_content:find (pattern, tstart); -- search for another harvc template
end
end
for _, pattern in ipairs (redirect_patterns_vcite) do -- for each of the vcite family template base patterns
tstart, tend = article_content:find (pattern); -- find the first vcite template
while tstart do -- nil when vcite template not found
template = article_content:match ('%b{}', tstart); -- get the whole template
if template then -- necessary?
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}} because we will strip templates from the vcite template
if ref then -- |ref={{template}}
citeref = sfnref_get (ref); -- returns content of {{sfnref}} or {{harvid}}; nil else
add_citeref (citeref, citerefs);
else
local params = {};
local template_name = template_name_get (template); -- get lowercase trimmed template name; ignore subpages ~/new, ~/sandbox
template_params_get (template, params); -- build a table of template parameters and their values
citeref = params['ref']; -- when both set, vcite uses value from |ref=
if not citeref and params['harvid'] then
citeref = 'CITEREF' .. params['harvid']; -- in vcite, |harvid= auto-adds 'CITEREF' prefix to the value in |harvid=
end
add_citeref (citeref, citerefs);
end
end
tstart = tend; -- reset the search starting index
tstart, tend = article_content:find (pattern, tstart); -- search for another vcite template
end
end
tstart, tend = article_content:find ('{{%s*[Ww]ikicite'); -- find the first {{wikicite}} template
while tstart do -- nil when cs1|2 template not found
template = article_content:match ('%b{}', tstart); -- get the whole template
if template then
local ref = template:match ('|%s*ref%s*=%s*(%b{})'); -- first look for |ref={{sfnref}} or |ref={{harvid}}
if ref then
citeref = sfnref_get (ref);
elseif template:match ('|%s*ref%s*=([^|}]+)') then
citeref = template:match ('|%s*ref%s*=([^|}]+)'); -- plain-text
elseif template:match ('|%s*id%s*=%s*(%b{})') then
citeref = template:match ('|%s*id%s*=%s*(%b{})');
elseif template:match ('|%s*id%s*=([^|}]+)') then
citeref = 'Reference-' .. template:match ('|%s*id%s*=([^|}]+)'); -- plain-text
end
if citeref then
add_citeref (citeref, citerefs);
end
end
tstart = tend; -- reset the search starting index
tstart, tend = article_content:find ('{{%s*[Ww]ikicite', tstart); -- search for another cs1|2 template
end
mw.log (mw.dumpObject (citerefs))
return citerefs;
end
--[[--------------------------< E X P O R T E D _ T A B L E S >------------------------------------------------
]]
return {
citerefs = citeref_list_make (), -- table of CITEREFs available in this article
}