Skip to content

Commit f295217

Browse files
committed
bug fixes - now exports valid DWCa without synonyms
Various bug fixes associated with quoting weird name and correctly formatting the xml files (I had introduces some typos in the hard coded-section).
1 parent 33c76d2 commit f295217

File tree

1 file changed

+25
-5
lines changed

1 file changed

+25
-5
lines changed

tools/taxonomy-parser.cpp

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -288,8 +288,8 @@ std::function<bool(tax_flags)> get_flags_match(variables_map& args) {
288288
}
289289

290290
void write_eml_xml(const std::string ofn) {
291-
string content = "<?xml version=\"1.0\"?>"
292-
"<eml:eml xmlns:eml=\"eml://ecoinformatics.org/eml-2.1.1\" xmlns:md=\"eml://ecoinformatics.org/methods-2.1.1\" xmlns:proj=\"eml://ecoinformatics.org/project-2.1.1\" xmlns:d\"=\"eml://ecoinformatics.org/dataset-2.1.1\" xmlns:res=\"eml://ecoinformatics.org/resource-2.1.1\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/\"2001/XMLSchema-instance\" packageId=\"/2020-5-30::0:53:12\" system=\"http://globalnames.org\" xml:lang=\"en\" xsi:schemaLocation=\"eml://ecoinformatics.org/eml-2.1.1 \"http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd\">\n"
291+
string content = "<?xml version=\"1.0\"?>\n"
292+
"<eml:eml xmlns:eml=\"eml://ecoinformatics.org/eml-2.1.1\" xmlns:md=\"eml://ecoinformatics.org/methods-2.1.1\" xmlns:proj=\"eml://ecoinformatics.org/project-2.1.1\" xmlns:d=\"eml://ecoinformatics.org/dataset-2.1.1\" xmlns:res=\"eml://ecoinformatics.org/resource-2.1.1\" xmlns:dc=\"http://purl.org/dc/terms/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" packageId=\"/2020-5-30::0:53:12\" system=\"http://globalnames.org\" xml:lang=\"en\" xsi:schemaLocation=\"eml://ecoinformatics.org/eml-2.1.1 http://rs.gbif.org/schema/eml-gbif-profile/1.0.1/eml.xsd\">\n"
293293
" <dataset id=\"\">\n"
294294
" <title>Open Tree of Life Taxonomy</title>\n"
295295
" <license/>\n"
@@ -346,14 +346,34 @@ void write_meta_xml(const std::string ofn) {
346346

347347
std::unordered_map<TaxonomicRank, int> g_rank2num;
348348
const char dwca_sep = ',';
349-
string escape_for_dwca(const string & ins) {
350-
if (ins.find(dwca_sep) == string::npos) {
349+
350+
inline string escape_double_quotes(const string & ins) {
351+
std::string escaped;
352+
unsigned len = static_cast<unsigned>(ins.length());
353+
escaped.reserve(len + 4);
354+
for (const auto & c : ins) {
355+
if (c == '\"') {
356+
escaped.append(1,'\"');
357+
}
358+
escaped.append(1, c);
359+
360+
}
361+
return escaped;
362+
}
363+
364+
inline string escape_for_dwca(const string & ins) {
365+
if (ins.find(dwca_sep) == string::npos
366+
&& ins.find('\"') == string::npos) {
351367
return ins;
352368
}
353369
string estr;
354370
estr.reserve(2 + ins.length());
355371
estr.append(1, '\"');
356-
estr.append(ins);
372+
if (ins.find('\"') == string::npos) {
373+
estr.append(ins);
374+
} else {
375+
estr.append(escape_double_quotes(ins));
376+
}
357377
estr.append(1, '\"');
358378
return estr;
359379
}

0 commit comments

Comments
 (0)