@@ -135,9 +135,9 @@ protected ParatextProjectTermsParserBase(ParatextProjectSettingsParserBase setti
135135 . Select ( kvp =>
136136 {
137137 string id = kvp . Item1 . Replace ( "\n " , "
" ) ;
138- string gloss = kvp . Item2 . Element ( "Renderings" ) . Value ;
139- IReadOnlyList < string > glosses = GetGlosses ( gloss ) ;
140- return ( id , glosses ) ;
138+ string rendering = kvp . Item2 . Element ( "Renderings" ) . Value ;
139+ IReadOnlyList < string > renderings = GetRenderings ( rendering ) ;
140+ return ( id , renderings ) ;
141141 } )
142142 . GroupBy ( kvp => kvp . Item1 , kvp => kvp . Item2 ) //Handle duplicate term ids (which do exist) e.g. שִׁלֵּמִי
143143 . Select ( grouping => ( grouping . Key , grouping . SelectMany ( g => g ) ) )
@@ -202,27 +202,39 @@ IDictionary<string, ImmutableHashSet<VerseRef>> termIdToReferences
202202 ) ;
203203 }
204204
205+ private static string CleanTerm ( string term )
206+ {
207+ term = term . Trim ( ) ;
208+ term = StripParens ( term ) ;
209+ term = string . Join ( " " , term . Split ( ) ) ;
210+ return term ;
211+ }
212+
205213 public static IReadOnlyList < string > GetGlosses ( string gloss )
206214 {
207215 //If entire term rendering is surrounded in square brackets, remove them
208216 Match match = ContentInBracketsRegex . Match ( gloss ) ;
209217 if ( match . Success )
210- gloss = match . Groups [ 0 ] . Value ;
218+ gloss = match . Groups [ 1 ] . Value ;
211219 gloss = gloss . Replace ( "?" , "" ) ;
212- gloss = gloss . Replace ( "*" , "" ) ;
213- gloss = gloss . Replace ( "/" , " " ) ;
214- gloss = gloss . Trim ( ) ;
215- gloss = StripParens ( gloss ) ;
220+ gloss = CleanTerm ( gloss ) ;
216221 gloss = StripParens ( gloss , left : '[' , right : ']' ) ;
217222 gloss = gloss . Trim ( ) ;
218223 foreach ( Match m in NumericalInformationRegex . Matches ( gloss ) )
219224 {
220225 gloss . Replace ( m . Value , "" ) ;
221226 }
222- IEnumerable < string > glosses = Regex . Split ( gloss , @"\|\|" ) ;
223- glosses = glosses . SelectMany ( g => g . Split ( new char [ ] { ',' , ';' } ) ) ;
224- glosses = glosses . Select ( g => g . Trim ( ) ) . Where ( s => s != "" ) . Distinct ( ) . ToList ( ) ;
225- return ( IReadOnlyList < string > ) glosses ;
227+ return Regex . Split ( gloss , @"[;,/]" ) . Select ( g => g . Trim ( ) ) . Where ( s => s != "" ) . Distinct ( ) . ToList ( ) ;
228+ }
229+
230+ public static IReadOnlyList < string > GetRenderings ( string rendering )
231+ {
232+ return Regex
233+ . Split ( rendering . Trim ( ) , @"\|\|" )
234+ . Select ( r => CleanTerm ( r ) )
235+ . Select ( r => r . Replace ( "*" , "" ) )
236+ . Where ( r => r != "" )
237+ . ToList ( ) ;
226238 }
227239
228240 /// <summary>
0 commit comments