1+ from __future__ import annotations
2+
3+ import pathlib
4+ from dataclasses import dataclass , field
5+ from typing import Optional , Union
6+
7+ from rdflib import Graph , Namespace , URIRef , Literal , RDF , RDFS
8+
9+
10+ # ---------------------------------------------------------------------------
11+ # Namespace declarations
12+ # ---------------------------------------------------------------------------
13+
14+ M4I = Namespace ("http://w3id.org/nfdi4ing/metadata4ing#" )
15+ MATHMOD = Namespace ("https://mardi4nfdi.de/mathmoddb#" )
16+ OBO = Namespace ("http://purl.obolibrary.org/obo/" )
17+
18+ HAS_NUMERICAL_VALUE = M4I .hasNumericalValue
19+ HAS_STRING_VALUE = M4I .hasStringValue
20+ HAS_UNIT = M4I .hasUnit
21+ HAS_KIND_OF_QTY = M4I .hasKindOfQuantity
22+ HAS_PART = OBO .BFO_0000051
23+ USES_CONFIG = M4I .usesConfiguration
24+ HAS_EMPLOYED_TOOL = M4I .hasEmployedTool
25+ INVESTIGATES = M4I .investigates
26+ EVALUATES = M4I .evaluates
27+ USES = URIRef ("https://mardi4nfdi.de/mathmoddb#uses" )
28+ DESCRIBED_BY = URIRef ("https://mardi4nfdi.de/mathmoddb#describedAsDocumentedBy" )
29+
30+ T_BENCHMARK = M4I .Benchmark
31+ T_PARAMETER_SET = M4I .ParameterSet
32+ T_NUMERICAL_VARIABLE = M4I .NumericalVariable
33+ T_PROCESSING_STEP = M4I .ProcessingStep
34+
35+
36+ # ---------------------------------------------------------------------------
37+ # Domain Classes
38+ # ---------------------------------------------------------------------------
39+
40+ @dataclass
41+ class KGNode :
42+ id : str
43+ label : Optional [str ] = None
44+
45+
46+ @dataclass
47+ class ResearchProblem (KGNode ):
48+ pass
49+
50+
51+ @dataclass
52+ class MathematicalModel (KGNode ):
53+ pass
54+
55+
56+ @dataclass
57+ class Publication (KGNode ):
58+ pass
59+
60+
61+ @dataclass
62+ class NumericalVariable (KGNode ):
63+ unit : Optional [str ] = None
64+ quantity_kind : Optional [str ] = None
65+
66+
67+ @dataclass
68+ class NumericalParameter (KGNode ):
69+ numerical_value : Optional [float ] = None
70+ unit : Optional [str ] = None
71+
72+
73+ @dataclass
74+ class TextParameter (KGNode ):
75+ string_value : Optional [str ] = None
76+
77+
78+ ParameterEntry = Union [NumericalParameter , TextParameter , NumericalVariable ]
79+
80+
81+ @dataclass
82+ class ParameterSet (KGNode ):
83+ label : Optional [str ] = None
84+ parts : list [ParameterEntry ] = field (default_factory = list )
85+
86+
87+ @dataclass
88+ class Tool (KGNode ):
89+ pass
90+
91+
92+ @dataclass
93+ class ProcessingStep (KGNode ):
94+ configurations : list [ParameterSet ] = field (default_factory = list )
95+ employed_tools : list [Tool ] = field (default_factory = list )
96+
97+
98+ @dataclass
99+ class BenchmarkSemantic (KGNode ):
100+ investigates : Optional [ResearchProblem ] = None
101+ uses : Optional [MathematicalModel ] = None
102+ evaluates : list [NumericalVariable ] = field (default_factory = list )
103+ parameter_sets : list [ParameterSet ] = field (default_factory = list )
104+ described_by : Optional [Publication ] = None
105+ processing_steps : list [ProcessingStep ] = field (default_factory = list )
106+
107+
108+ # ---------------------------------------------------------------------------
109+ # Loader Class (NEW)
110+ # ---------------------------------------------------------------------------
111+
112+ class BenchmarkLoader :
113+ def __init__ (self , jsonld_path : str | pathlib .Path ):
114+ self .path = pathlib .Path (jsonld_path )
115+
116+ if not self .path .exists ():
117+ raise FileNotFoundError (f"File not found: { self .path } " )
118+
119+ self .graph = Graph ()
120+ self .graph .parse (str (self .path ), format = "json-ld" )
121+
122+ for s ,p ,o in self .graph :
123+ print (s , p , o )
124+
125+ def _str (self , uri : URIRef ) -> str :
126+ return str (uri )
127+
128+ def _label (self , subject : URIRef ) -> Optional [str ]:
129+ # print(f"Getting label for {subject}")
130+ val = self .graph .value (subject , RDFS .label )
131+ return str (val ) if val else None
132+
133+ def _scalar (self , subject : URIRef , predicate : URIRef ):
134+ val = self .graph .value (subject , predicate )
135+ if val is None :
136+ return None
137+ return val .toPython () if isinstance (val , Literal ) else str (val )
138+
139+ def build_numerical_parameter (self , uri : URIRef ) -> NumericalParameter :
140+ return NumericalParameter (
141+ id = self ._str (uri ),
142+ label = self ._label (uri ),
143+ numerical_value = self ._scalar (uri , HAS_NUMERICAL_VALUE ),
144+ unit = self ._scalar (uri , HAS_UNIT ),
145+ )
146+
147+ def build_text_parameter (self , uri : URIRef ) -> TextParameter :
148+ return TextParameter (
149+ id = self ._str (uri ),
150+ label = self ._label (uri ),
151+ string_value = self ._scalar (uri , HAS_STRING_VALUE ),
152+ )
153+
154+ def build_numerical_variable (self , uri : URIRef ) -> NumericalVariable :
155+ return NumericalVariable (
156+ id = self ._str (uri ),
157+ label = self ._label (uri ),
158+ unit = self ._scalar (uri , HAS_UNIT ),
159+ quantity_kind = self ._scalar (uri , HAS_KIND_OF_QTY ),
160+ )
161+
162+ def build_parameter_entry (self , uri : URIRef ) -> ParameterEntry :
163+ if self .graph .value (uri , HAS_STRING_VALUE ):
164+ return self .build_text_parameter (uri )
165+ if (uri , RDF .type , T_NUMERICAL_VARIABLE ) in self .graph :
166+ return self .build_numerical_variable (uri )
167+ return self .build_numerical_parameter (uri )
168+
169+ def build_parameter_set (self , uri : URIRef ) -> ParameterSet :
170+ label = self ._label (uri )
171+ parts = [
172+ self .build_parameter_entry (part )
173+ for part in self .graph .objects (uri , HAS_PART )
174+ ]
175+ return ParameterSet (id = self ._str (uri ), label = label , parts = parts )
176+
177+ def build_tool (self , uri : URIRef ) -> Tool :
178+ return Tool (id = self ._str (uri ), label = self ._label (uri ))
179+
180+ def build_processing_step (self , uri : URIRef ) -> ProcessingStep :
181+ configs = [
182+ self .build_parameter_set (c )
183+ for c in self .graph .objects (uri , USES_CONFIG )
184+ ]
185+ tools = [
186+ self .build_tool (t )
187+ for t in self .graph .objects (uri , HAS_EMPLOYED_TOOL )
188+ ]
189+ return ProcessingStep (
190+ id = self ._str (uri ),
191+ label = self ._label (uri ),
192+ configurations = configs ,
193+ employed_tools = tools ,
194+ )
195+
196+ def load (self ) -> BenchmarkSemantic :
197+ g = self .graph
198+
199+ bm_uri = next (g .subjects (RDF .type , T_BENCHMARK ), None )
200+ if bm_uri is None :
201+ raise ValueError ("No m4i:Benchmark node found." )
202+
203+ rp_uri = g .value (bm_uri , INVESTIGATES )
204+ mm_uri = g .value (bm_uri , USES )
205+ pub_uri = g .value (bm_uri , DESCRIBED_BY )
206+
207+ research_problem = (
208+ ResearchProblem (id = self ._str (rp_uri ), label = self ._label (rp_uri ))
209+ if rp_uri else None
210+ )
211+
212+ math_model = (
213+ MathematicalModel (id = self ._str (mm_uri ), label = self ._label (mm_uri ))
214+ if mm_uri else None
215+ )
216+
217+ publication = (
218+ Publication (id = self ._str (pub_uri ), label = self ._label (pub_uri ))
219+ if pub_uri else None
220+ )
221+
222+ metrics = [
223+ self .build_numerical_variable (m )
224+ for m in g .objects (bm_uri , EVALUATES )
225+ ]
226+
227+ param_sets = [
228+ self .build_parameter_set (ps )
229+ for ps in g .objects (bm_uri , M4I .hasParameterSet )
230+ ]
231+
232+ steps = [
233+ self .build_processing_step (s )
234+ for s in g .subjects (RDF .type , T_PROCESSING_STEP )
235+ ]
236+
237+ return BenchmarkSemantic (
238+ id = self ._str (bm_uri ),
239+ label = self ._label (bm_uri ),
240+ investigates = research_problem ,
241+ uses = math_model ,
242+ evaluates = metrics ,
243+ parameter_sets = param_sets ,
244+ described_by = publication ,
245+ processing_steps = steps ,
246+ )
0 commit comments