(* Get Wikipedia articles *)
wikiEn[name_String] := "";
wikiRu[name_String] := "";
maintainWiki := (
textcondit = ((StringLength[#] > 10) &);
(* English wiki *)
lang = "en";
dirwiki = ToFileName[{dirdata, "wiki", lang}];
checkdir1[dirwiki];
Do[
title = "";
name1 = StringReplace[name, " " -> "_"];
url = "http://" <> lang <> ".wikipedia.org/wiki/" <> name1;
file = ToFileName[dirwiki, htmlname[name] <> ".m"];
If[FileType[file] =!= File,
filetemp = ToFileName[dirtemp, "wikilookup-" <> lang <> ".htm"];
If[FileType[filetemp] === File, DeleteFile[filetemp]];
{response, text} = urlsave[url, filetemp, textcondit];
Save[file, {response, text}];
];
Get[file];
If[response[[2]] != 200,
nPrint["Warning: URL = ", url, ": bad response: ", response[[2]]],
title =
StringCases[text, "
"][[1]] // removetags;
If[Head[title] =!= String,
Print["Error: Head[title]=!=String: ", name, " ", lang],
If[title =!= name,
Print["Redirecting: ", name, " -> ", title];
redirecten[name] = title;
];
filedata = ToFileName[dirwiki, htmlname[title] <> ".json"];
text1 =
StringReplace[text,
RegularExpression["(?i)(?s)
\\s*(.*?)\\s*"] ->
""];
content = StringCases[text1, ""];
content = Table[
par1 =
StringReplace[par,
RegularExpression["(?i)(?s)
\\s*(.*?)\\s*"] -> "$1"];
par1 =
StringReplace[par1,
RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] ->
""];
par1 =
StringReplace[par1,
RegularExpression["(?i)(?s)(.*?)"] -> "$1"];
Do[
par1 = StringReplace[par1,
RegularExpression["(?i)(?s)(.*?)"] ->
"$1"], {5}];
par1 = stringtrim[par1];
par1, {par, content}];
content = Select[content, # =!= "" &];
If[content === {},
Print["Error wiki: content==={}: ", name, " ", lang],
wikitexten[name] = content];
namea = StringReplace[name, " " -> "_"];
title1 = StringReplace[title, " " -> "%20"];
name1a = StringReplace[name1, " " -> "_"];
url =
"https://" <> lang <>
".wikipedia.org/w/api.php?action=query&prop=extracts&exintro&titles=" <> title1 <> "&format=json";
urlsave[url, filedata, textcondit];
(* Better to use this, if it is defined *)
If[FileType[filedata] === File,
a = Import[filedata];
If[Cases[a, "missing", Infinity, 1] === {},
b = Cases[a, Rule["extract", b_] :> b, Infinity, 1][[1]];
If[Head[b] =!= String,
Print["Internal error: file ", filedata,
" incorrectly parsed. URL: ", url, " File content: ", a],
If[! StringFreeQ[b, "This is a redirect"],
url1 = "http://en.wikipedia.org/wiki/" <> name1a;
Print["Warning: Wikipedia: add a new redirect: ", name,
": see ", Hyperlink["Wikipedia article: " <> namer, url1]];
filet = ToFileName[dirtemp, "wikiarticle.htm"];
resp = URLSave[url1, filet, {"Headers", "StatusCode"}];
If[resp[[2]] == 200, txt = Import[filet, "Text"];
txt = StringCases[txt, "" ~~ x__ ~~ "" -> x, 1];
If[MatchQ[txt, {_}],
newname = StringTrim[StringSplit[txt[[1]], " - "][[1]]];
Print[" - add new line to maintain-wiki.m: \"", name,
"\" -> \"", newname, "\""];]];
Print["File ", filedata, " should be deleted!"];
];
content =
StringSplit[
b, {WhitespaceCharacter___ ~~ "" ~~
WhitespaceCharacter___ ~~ "" ~~ WhitespaceCharacter}];
content = Table[
par1 = StringReplace[par, {"
" -> "", "
" -> ""}];
par1 = stringtrim[par1];
par1, {par, content}];
content = Select[content, # =!= "" &];
If[content === {},
Print["Error: content==={}: ", name, " ", lang],
wikitext1en[name] = wikitexten[name]];
]],
Print["Warning: file ", filedata, " not found."];
];
]; (* If[Head[title] =!= String, *)
]; (* If[response[[2]] != 200, *)
, {name, namespic}];
(* Russian wiki *)
lang = "ru";
dirwiki = ToFileName[{dirdata, "wiki", lang}];
checkdir1[dirwiki];
Do[
name1 = StringReplace[name, " " -> "_"];
url = "http://" <> lang <> ".wikipedia.org/wiki/" <> name1;
file = ToFileName[dirwiki, htmlname[name] <> ".m"];
If[FileType[file] =!= File,
filetemp = ToFileName[dirtemp, "wikilookup-" <> lang <> ".htm"];
If[FileType[filetemp] === File, DeleteFile[filetemp]];
{response, text} = urlsave[url, filetemp, textcondit];
Save[file, {response, text}];
];
Get[file];
If[response[[2]] != 200,
nPrint["Warning: URL = ", url, ": bad response: ", response[[2]]],
title =
StringCases[text, ""][[1]] // removetags;
If[Head[title] =!= String,
Print["Error: Head[title]=!=String: ", name, " ", lang],
If[title =!= name,
nPrint["Redirecting: ", name, " -> ", title];
redirectru[name] = title;
];
filedata = ToFileName[dirwiki, htmlname[title] <> ".json"];
text1 =
StringReplace[text,
RegularExpression["(?i)(?s)
\\s*(.*?)\\s*"] ->
""];
content = StringCases[text1, ""];
content = Table[
par1 =
StringReplace[par,
RegularExpression["(?i)(?s)
\\s*(.*?)\\s*"] -> "$1"];
par1 =
StringReplace[par1,
RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] ->
""];
par1 =
StringReplace[par1,
RegularExpression["(?i)(?s)(.*?)"] -> "$1"];
Do[
par1 = StringReplace[par1,
RegularExpression["(?i)(?s)(.*?)"] ->
"$1"], {5}];
par1 = stringtrim[par1];
par1, {par, content}];
content = Select[content, # =!= "" &];
If[content === {},
Print["Error: content==={}: ", name, " ", lang],
wikitext1ru[name] = wikitextru[name]];
namea = StringReplace[name, " " -> "_"];
title1 = StringReplace[title, " " -> "%20"];
name1a = StringReplace[name1, " " -> "_"];
url =
"https://" <> lang <>
".wikipedia.org/w/api.php?action=query&prop=extracts&exintro&\
titles=" <> title1 <> "&format=json";
urlsave[url, filedata, textcondit];
(* Better to use this, if it is defined *)
If[FileType[filedata] === File,
a = Import[filedata];
If[Cases[a, "missing", Infinity, 1] === {},
b = Cases[a, Rule["extract", b_] :> b, Infinity, 1][[1]];
If[Head[b] =!= String,
Print["Internal error: file ", filedata,
" incorrectly parsed. URL: ", url, " File content: ", a],
If[! StringFreeQ[b, "This is a redirect"],
url1 = "http://en.wikipedia.org/wiki/" <> name1a;
Print["Warning: Wikipedia: add a new redirect: ", name,
": see ", Hyperlink["Wikipedia article: " <> namer, url1]];
filet = ToFileName[dirtemp, "wikiarticle.htm"];
resp = URLSave[url1, filet, {"Headers", "StatusCode"}];
If[resp[[2]] == 200, txt = Import[filet, "Text"];
txt = StringCases[txt, "" ~~ x__ ~~ "" -> x, 1];
If[MatchQ[txt, {_}],
newname = StringTrim[StringSplit[txt[[1]], " - "][[1]]];
Print[" - add new line to maintain-wiki.m: \"", name,
"\" -> \"", newname, "\""];]];
Print["File ", filedata, " should be deleted!"];
];
content =
StringSplit[
b, {WhitespaceCharacter___ ~~ "" ~~
WhitespaceCharacter___ ~~ "" ~~ WhitespaceCharacter}];
content = Table[
par1 = StringReplace[par, {"
" -> "", "
" -> ""}];
par1 = stringtrim[par1];
par1, {par, content}];
content = Select[content, # =!= "" &];
If[content === {},
Print["Error: content==={}: ", name, " ", lang],
wikitext1ru[name] = content];
]],
Print["Warning: file ", filedata, " not found."];
];
]; (* If[Head[title] =!= String *)
]; (* If[response[[2]] != 200 *)
(* First paragraph of Wiki *)
wikipar = wikitext1en[name];
If[Head[wikipar] === List && wikipar =!= {}, wikiEn[name] = wikipar[[1]]];
wikipar = wikitext1ru[name];
If[Head[wikipar] === List && wikipar =!= {},
wikiRu[name] = StringReplace[wikipar[[1]], "" -> "... "];
];
, {name, namespic}];
);