(* Get Wikipedia articles *) wikiEn[name_String] := ""; wikiRu[name_String] := ""; maintainWiki := ( textcondit = ((StringLength[#] > 10) &); (* English wiki *) lang = "en"; dirwiki = ToFileName[{dirdata, "wiki", lang}]; checkdir1[dirwiki]; Do[ title = ""; name1 = StringReplace[name, " " -> "_"]; url = "http://" <> lang <> ".wikipedia.org/wiki/" <> name1; file = ToFileName[dirwiki, htmlname[name] <> ".m"]; If[FileType[file] =!= File, filetemp = ToFileName[dirtemp, "wikilookup-" <> lang <> ".htm"]; If[FileType[filetemp] === File, DeleteFile[filetemp]]; {response, text} = urlsave[url, filetemp, textcondit]; Save[file, {response, text}]; ]; Get[file]; If[response[[2]] != 200, nPrint["Warning: URL = ", url, ": bad response: ", response[[2]]], title = StringCases[text, ""][[1]] // removetags; If[Head[title] =!= String, Print["Error: Head[title]=!=String: ", name, " ", lang], If[title =!= name, Print["Redirecting: ", name, " -> ", title]; redirecten[name] = title; ]; filedata = ToFileName[dirwiki, htmlname[title] <> ".json"]; text1 = StringReplace[text, RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] -> ""]; content = StringCases[text1, ""]; content = Table[ par1 = StringReplace[par, RegularExpression["(?i)(?s)\\s*(.*?)\\s*

"] -> "$1"]; par1 = StringReplace[par1, RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] -> ""]; par1 = StringReplace[par1, RegularExpression["(?i)(?s)(.*?)"] -> "$1"]; Do[ par1 = StringReplace[par1, RegularExpression["(?i)(?s)(.*?)"] -> "$1"], {5}]; par1 = stringtrim[par1]; par1, {par, content}]; content = Select[content, # =!= "" &]; If[content === {}, Print["Error wiki: content==={}: ", name, " ", lang], wikitexten[name] = content]; namea = StringReplace[name, " " -> "_"]; title1 = StringReplace[title, " " -> "%20"]; name1a = StringReplace[name1, " " -> "_"]; url = "https://" <> lang <> ".wikipedia.org/w/api.php?action=query&prop=extracts&exintro&titles=" <> title1 <> "&format=json"; urlsave[url, filedata, textcondit]; (* Better to use this, if it is defined *) If[FileType[filedata] === File, a = Import[filedata]; If[Cases[a, "missing", Infinity, 1] === {}, b = Cases[a, Rule["extract", b_] :> b, Infinity, 1][[1]]; If[Head[b] =!= String, Print["Internal error: file ", filedata, " incorrectly parsed. URL: ", url, " File content: ", a], If[! StringFreeQ[b, "This is a redirect"], url1 = "http://en.wikipedia.org/wiki/" <> name1a; Print["Warning: Wikipedia: add a new redirect: ", name, ": see ", Hyperlink["Wikipedia article: " <> namer, url1]]; filet = ToFileName[dirtemp, "wikiarticle.htm"]; resp = URLSave[url1, filet, {"Headers", "StatusCode"}]; If[resp[[2]] == 200, txt = Import[filet, "Text"]; txt = StringCases[txt, "" ~~ x__ ~~ "" -> x, 1]; If[MatchQ[txt, {_}], newname = StringTrim[StringSplit[txt[[1]], " - "][[1]]]; Print[" - add new line to maintain-wiki.m: \"", name, "\" -> \"", newname, "\""];]]; Print["File ", filedata, " should be deleted!"]; ]; content = StringSplit[ b, {WhitespaceCharacter___ ~~ "

" ~~ WhitespaceCharacter___ ~~ "

" ~~ WhitespaceCharacter}]; content = Table[ par1 = StringReplace[par, {"

" -> "", "

" -> ""}]; par1 = stringtrim[par1]; par1, {par, content}]; content = Select[content, # =!= "" &]; If[content === {}, Print["Error: content==={}: ", name, " ", lang], wikitext1en[name] = wikitexten[name]]; ]], Print["Warning: file ", filedata, " not found."]; ]; ]; (* If[Head[title] =!= String, *) ]; (* If[response[[2]] != 200, *) , {name, namespic}]; (* Russian wiki *) lang = "ru"; dirwiki = ToFileName[{dirdata, "wiki", lang}]; checkdir1[dirwiki]; Do[ name1 = StringReplace[name, " " -> "_"]; url = "http://" <> lang <> ".wikipedia.org/wiki/" <> name1; file = ToFileName[dirwiki, htmlname[name] <> ".m"]; If[FileType[file] =!= File, filetemp = ToFileName[dirtemp, "wikilookup-" <> lang <> ".htm"]; If[FileType[filetemp] === File, DeleteFile[filetemp]]; {response, text} = urlsave[url, filetemp, textcondit]; Save[file, {response, text}]; ]; Get[file]; If[response[[2]] != 200, nPrint["Warning: URL = ", url, ": bad response: ", response[[2]]], title = StringCases[text, ""][[1]] // removetags; If[Head[title] =!= String, Print["Error: Head[title]=!=String: ", name, " ", lang], If[title =!= name, nPrint["Redirecting: ", name, " -> ", title]; redirectru[name] = title; ]; filedata = ToFileName[dirwiki, htmlname[title] <> ".json"]; text1 = StringReplace[text, RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] -> ""]; content = StringCases[text1, ""]; content = Table[ par1 = StringReplace[par, RegularExpression["(?i)(?s)\\s*(.*?)\\s*

"] -> "$1"]; par1 = StringReplace[par1, RegularExpression["(?i)(?s)\\s*(.*?)\\s*"] -> ""]; par1 = StringReplace[par1, RegularExpression["(?i)(?s)(.*?)"] -> "$1"]; Do[ par1 = StringReplace[par1, RegularExpression["(?i)(?s)(.*?)"] -> "$1"], {5}]; par1 = stringtrim[par1]; par1, {par, content}]; content = Select[content, # =!= "" &]; If[content === {}, Print["Error: content==={}: ", name, " ", lang], wikitext1ru[name] = wikitextru[name]]; namea = StringReplace[name, " " -> "_"]; title1 = StringReplace[title, " " -> "%20"]; name1a = StringReplace[name1, " " -> "_"]; url = "https://" <> lang <> ".wikipedia.org/w/api.php?action=query&prop=extracts&exintro&\ titles=" <> title1 <> "&format=json"; urlsave[url, filedata, textcondit]; (* Better to use this, if it is defined *) If[FileType[filedata] === File, a = Import[filedata]; If[Cases[a, "missing", Infinity, 1] === {}, b = Cases[a, Rule["extract", b_] :> b, Infinity, 1][[1]]; If[Head[b] =!= String, Print["Internal error: file ", filedata, " incorrectly parsed. URL: ", url, " File content: ", a], If[! StringFreeQ[b, "This is a redirect"], url1 = "http://en.wikipedia.org/wiki/" <> name1a; Print["Warning: Wikipedia: add a new redirect: ", name, ": see ", Hyperlink["Wikipedia article: " <> namer, url1]]; filet = ToFileName[dirtemp, "wikiarticle.htm"]; resp = URLSave[url1, filet, {"Headers", "StatusCode"}]; If[resp[[2]] == 200, txt = Import[filet, "Text"]; txt = StringCases[txt, "" ~~ x__ ~~ "" -> x, 1]; If[MatchQ[txt, {_}], newname = StringTrim[StringSplit[txt[[1]], " - "][[1]]]; Print[" - add new line to maintain-wiki.m: \"", name, "\" -> \"", newname, "\""];]]; Print["File ", filedata, " should be deleted!"]; ]; content = StringSplit[ b, {WhitespaceCharacter___ ~~ "

" ~~ WhitespaceCharacter___ ~~ "

" ~~ WhitespaceCharacter}]; content = Table[ par1 = StringReplace[par, {"

" -> "", "

" -> ""}]; par1 = stringtrim[par1]; par1, {par, content}]; content = Select[content, # =!= "" &]; If[content === {}, Print["Error: content==={}: ", name, " ", lang], wikitext1ru[name] = content]; ]], Print["Warning: file ", filedata, " not found."]; ]; ]; (* If[Head[title] =!= String *) ]; (* If[response[[2]] != 200 *) (* First paragraph of Wiki *) wikipar = wikitext1en[name]; If[Head[wikipar] === List && wikipar =!= {}, wikiEn[name] = wikipar[[1]]]; wikipar = wikitext1ru[name]; If[Head[wikipar] === List && wikipar =!= {}, wikiRu[name] = StringReplace[wikipar[[1]], "
    " ~~ Shortest[___] ~~ "
" -> "... "]; ]; , {name, namespic}]; );