(* Reading mushroom data for IDs from saved files *) maintainReadData := Module[{}, Print["Running maintainReadData ..."]; filecomponents = ToFileName[dirdata, "components.m"]; ClearAll[idcomponents]; (* If[FileType[filecomponents] === File, Get[filecomponents]]; *) dirids = ToFileName[{dirdata, "ids"}]; SetDirectory[dirids]; files = FileNames[__ ~~ ".xml"]; Do[ id = StringDrop[file, -4]; If[Head[idcomponents["name", id]] =!= String, dataxml = Import[file, {"XML"}]; results = Cases[dataxml, XMLElement["result", _, a_] :> a, Infinity]; If[Length[results] != 1, Print["error reading file ", file, " : Length[results] != 1"]]; dataxml = results[[1]]; reps = {{a_} :> a, {s___String} :> StringJoin[Riffle[{s}, " "]], XMLElement[x_String, y_, z_String] :> "<" <> x <> ">" <> z <> " x <> ">"}; idcomponents["name_html", id] = Cases[dataxml, XMLElement["name_html", _, a_] :> a, Infinity, 1] //. reps; name0 = Cases[dataxml, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps; name0 = StringReplace[name0, RegularExpression["(.*?( ×)? .*?) .*"] -> "$1"]; idcomponents["name", id] = name0; idcomponents["genus", id] = Cases[dataxml, XMLElement["genus", _, a_] :> a, Infinity, 1] //. reps; idcomponents["species", id] = Cases[dataxml, XMLElement["species", _, a_] :> a, Infinity, 1] //. reps; plantsynonyms0 = Cases[dataxml, XMLElement["synonym", _, a_] :> a, Infinity]; msyn = Length[plantsynonyms0]; plantsynonyms1 = (Cases[#, XMLElement["name_html", _, a_] :> a, Infinity, 1] //. reps) & /@ plantsynonyms0; plantsynonyms2 = (Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps) & /@ plantsynonyms0; noPrint["Synonyms (", msyn, "): ", {plantsynonyms1, plantsynonyms2} // Transpose // TableForm]; (*Simplification*) plantsynonyms2 = stringtrim /@ plantsynonyms2; idcomponents["synonyms", id] = Complement[ Union[StringReplace[#, RegularExpression["(.*?( ×)? .*?) .*"] -> "$1"] & /@ plantsynonyms2], {name0}]; cls = Cases[dataxml, XMLElement["classification", _, a_] :> a, Infinity]; If[Length[cls] != 1, Print["error reading classification in file ", file, " : Length[cls] != 1"]]; cls = cls[[1]]; taxons = Cases[cls, XMLElement["taxon", _, a_] :> a, Infinity]; planttaxons = {Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps, Cases[#, XMLElement["rank", _, a_] :> a, Infinity, 1] //. reps} & /@ taxons; idcomponents["kingdom", id] = Select[planttaxons, #[[2]] === "Kingdom" &][[1, 1]]; idcomponents["phylum", id] = Select[planttaxons, #[[2]] === "Phylum" &][[1, 1]]; idcomponents["class", id] = Select[planttaxons, #[[2]] === "Class" &][[1, 1]]; idcomponents["order", id] = Select[planttaxons, #[[2]] === "Order" &][[1, 1]]; idcomponents["family", id] = Select[planttaxons, #[[2]] === "Family" &][[1, 1]]; (*idcomponents["genus",id]=Select[planttaxons,#[[2]]=== (* Defined twice! *) "Genus"&][[1,1]];*) noPrint[{{"Class", plantclass}, {"Family", plantfamily}, {"Genus", plantgenus}} // TableForm]; cnames = Cases[dataxml, XMLElement["common_name", _, a_] :> a, Infinity]; idcomponents["common names", id] = (Cases[#, XMLElement["name", _, a_] :> a, Infinity, 1] //. reps) & /@ cnames; noPrint[id, ": ", idcomponents["kingdom", id], " > ", idcomponents["phylum", id], " > ", idcomponents["class", id], " > ", idcomponents["order", id], " > ", idcomponents["family", id], " > ", idcomponents["name", id], " ", idcomponents["common names", id]]; 0], {file, files}]; If[FileType[filecomponents] === File, DeleteFile[filecomponents]]; Save[filecomponents, idcomponents]; ];