BioSchemas
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎_includes/head.html‎
Lines changed: 10 additions & 0 deletions b/‎_includes/head.html‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎notebooks/BioschemasTypes.ipynb‎
Lines changed: 355 additions & 0 deletions b/‎notebooks/BioschemasTypes.ipynb‎
Lines changed: 355 additions & 0 deletions
diff --git a/‎notebooks/bioschemas_draft_types_temp.json‎
Lines changed: 1 addition & 0 deletions b/‎notebooks/bioschemas_draft_types_temp.json‎
Lines changed: 1 addition & 0 deletions
@@ -11,4 +11,5 @@ vendor
 .jekyll-metadata
 workspace.code-workspace
 /Gemfile
-/Gemfile.lock
+/Gemfile.lock
+.ipynb_checkpoints
@@ -35,4 +35,14 @@
   <script src="{{ 'assets/js/bootstrap.bundle.min.js' | relative_url }}"></script>
   <script src="{{ 'assets/js/toc.js' | relative_url }}"></script>
   <script defer src="{{'assets/js/all.min.js' | relative_url }}"></script>
+  <!-- Signposting -->
+  {% if page.signposting %}
+    {% for item in page.signposting %}
+      {% if item.rel == "type" %}
+      <link rel="{{item.rel}}" type="application/ld+json" href="{{item.link}}"> 
+      {% else %}
+      <link rel="{{item.rel}}" href="{{item.link}}"> 
+      {% endif %}
+    {% endfor %}
+  {% endif %}
 </head>
@@ -0,0 +1,355 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9a444332-ef86-46cf-a110-3f8cf66c1c2b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: rdflib in c:\\software\\iniforge3\\lib\\site-packages (7.1.3)\n",
+      "Requirement already satisfied: pyparsing<4,>=2.1.0 in c:\\software\\iniforge3\\lib\\site-packages (from rdflib) (3.2.1)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import sys\n",
+    "!{sys.executable} -m pip install rdflib"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "06db8876-5d45-4cf2-9251-4cf2a83287fc",
+   "metadata": {},
+   "source": [
+    "## Modify context and remove unnecessary keys from the JSON-LD file"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 122,
+   "id": "26fc24a1-fa8d-4de5-b57f-36fa8fb0c935",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import urllib.request, json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 123,
+   "id": "0080eaab-f809-43ba-a6dd-989b8809dac2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def remove_key (key_to_remove, data):\n",
+    "  if key_to_remove in data:\n",
+    "    #removed_value = data.pop(key_to_remove)\n",
+    "    del data[key_to_remove]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 124,
+   "id": "302448cc-c8e8-4e45-bef9-951002e9f43b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def replace_string_in_json(search_value, replace_value, data):\n",
+    "  if isinstance(data, dict):  # Check if data is a dictionary\n",
+    "    for key, value in data.items():\n",
+    "      if isinstance(value, str) and search_value in value:  # Check if value is a string containing search_value\n",
+    "        data[key] = value.replace(search_value, replace_value)\n",
+    "      elif isinstance(value, (dict, list)):  # Recursively call for nested dictionaries or lists\n",
+    "        replace_string_in_json(search_value, replace_value, value)\n",
+    "  elif isinstance(data, list):  # Check if data is a list\n",
+    "    for item in data:\n",
+    "      replace_string_in_json(search_value, replace_value, item) # Recursively call for list items"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 148,
+   "id": "a16b30ca-a972-4ca2-89d0-89d07ced82c9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_context(types_data, bs_ns, bs_url) :\n",
+    "    #remove all DDE bioschemas namespaces\n",
+    "    remove_key(\"bioschemas\", types_data[\"@context\"])\n",
+    "    remove_key(\"bioschemasdrafts\", types_data[\"@context\"])\n",
+    "    remove_key(\"bioschemastypes\", types_data[\"@context\"])\n",
+    "    remove_key(\"bioschemastypesdrafts\", types_data[\"@context\"])\n",
+    "    remove_key(\"bioschemasdeprecated\", types_data[\"@context\"])\n",
+    "    \n",
+    "    #also remove schema as it should point to https://schema.org rather than http://schema.org\n",
+    "    remove_key(\"schema\", types_data[\"@context\"])\n",
+    "    \n",
+    "    #add the canonical Bioschemas types namespace and corresponding url\n",
+    "    types_data[\"@context\"][\"bioschemas\"] = \"https://bioschemas.org/terms/\"\n",
+    "    try :\n",
+    "        types_data[\"@context\"][bs_ns]\n",
+    "    except KeyError :\n",
+    "        types_data[\"@context\"][bs_ns] = bs_url\n",
+    "    \n",
+    "    #also add back schema pointing to https://schema.org\n",
+    "    types_data[\"@context\"][\"schema\"] = \"https://schema.org/\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 141,
+   "id": "fc96c453-ad88-4c2b-a6be-816df8309041",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_graph(types_data, bs_ns, dde_ns) :\n",
+    "    #remove additional_type from every element in the graph, used on the website for navigation but not needed in the JSON-LD\n",
+    "    for item in types_data[\"@graph\"]:\n",
+    "        remove_key(\"additional_type\", item)\n",
+    "        remove_key(\"schema:additionalType\", item)\n",
+    "\n",
+    "    #now use the new bioschemas namespace instedad the old one bioschemastypes\n",
+    "    replace_string_in_json(dde_ns, bs_ns, types_data[\"@graph\"])\n",
+    "    replace_string_in_json(\"bioschemastypes\", \"bioschemas\", types_data[\"@graph\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 133,
+   "id": "5f7464e0-a992-41a7-8970-4f12dd2d4661",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_json(types_data, bs_ns, bs_url, dde_ns) :\n",
+    "    process_context(types_data, bs_ns, bs_url)\n",
+    "    process_graph(types_data, bs_ns, dde_ns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 134,
+   "id": "259dc83d-6037-4788-9f46-8d9ffa40a3a3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_all_json(origin, destination, bs_ns, bs_url, dde_ns) :\n",
+    "    #Get the bioschemas types file from the Bioschemas DDE GitHub repository\n",
+    "    with urllib.request.urlopen(origin) as types_file:\n",
+    "        types_data = json.load(types_file)\n",
+    "    \n",
+    "    process_json(types_data, bs_ns, bs_url, dde_ns)\n",
+    "    \n",
+    "    #save modified json to local temp file\n",
+    "    with open(destination, 'w') as temp_types_file:\n",
+    "        json.dump(types_data, temp_types_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bbd8fffc-e218-407a-8e13-04ddc2c7620b",
+   "metadata": {},
+   "source": [
+    "### Process JSON-LD file for Bioschemas types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 149,
+   "id": "710ea645-7525-4009-bd5e-b122c475bb5c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "origin = \"https://github.com/BioSchemas/bioschemas-dde/raw/main/bioschemastypes.json\"\n",
+    "destination = \"bioschemas_types_temp.json\"\n",
+    "bs_ns = \"bioschemas\"\n",
+    "bs_url = \"https://bioschemas.org/terms/\"\n",
+    "dde_ns = \"bioschemastypes\"\n",
+    "dde_url = \"https://discovery.biothings.io/view/bioschemastypes/\"\n",
+    "w3id_url = \"https://w3id.org/bioschemas/terms/\"\n",
+    "process_all_json(origin, destination, bs_ns, bs_url, dde_ns)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a4cc29b8-92ab-4473-97f4-5934780a56fc",
+   "metadata": {},
+   "source": [
+    "### Process JSON-LD file for Bioschemas draft types"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 150,
+   "id": "7705bc06-06dc-4b53-9a35-4894c594ebd0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "origin = \"https://github.com/BioSchemas/bioschemas-dde/raw/main/bioschemastypesdrafts.json\"\n",
+    "destination = \"bioschemas_draft_types_temp.json\"\n",
+    "bs_ns = \"bioschemas_draft\"\n",
+    "bs_url = \"https://bioschemas.org/draft_terms/\"\n",
+    "dde_ns = \"bioschemastypesdrafts\"\n",
+    "dde_url = \"https://discovery.biothings.io/view/bioschemastypesdrafts/\"\n",
+    "w3id_url = \"https://w3id.org/bioschemas/draft_terms/\"\n",
+    "process_all_json(origin, destination, bs_ns, bs_url, dde_ns)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "4b4b51d5-7200-40f4-9844-b094507caa2b",
+   "metadata": {},
+   "source": [
+    "## Add same-as to DDE and w3id"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "90246e73-5b99-42b6-9c54-28a2fc147ea0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from rdflib import Graph\n",
+    "from rdflib import URIRef\n",
+    "from rdflib.namespace import RDF, RDFS, OWL"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "8f5d0652-2029-4cfd-b9cc-9faa9964214f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def add_equivalence(g, eq_dde_type, eq_w3id_type, eq_dde_type_draft, eq_w3id_type_draft, bs_ns, bs_ns_draft) :\n",
+    "\n",
+    "    #Classes and properties already aincluded in schema.org\n",
+    "    types_in_schema = [\"BioChemEntity\", \"ChemicalSubstance\", \"Gene\", \"MolecularEntity\", \"Protein\", \"Taxon\"]\n",
+    "    properties_in_schema = [\"bioChemInteraction\", \"bioChemSimilarity\", \"biologicalRole\", \"hasBioChemEntityPart\", \"hasMolecularFunction\", \"hasRepresentation\", \"isEncodedByBioChemEntity\", \"isInvolvedInBiologicalProcess\", \"isLocatedInSubcellularLocation\", \"isPartOfBioChemEntity\", \"taxonomicRange\", \"alternativeOf\", \"encodesBioChemEntity\", \"expressedIn\", \"hasBioPolymerSequence\", \"chemicalComposition\", \"chemicalRole\", \"potentialUse\", \"chemicalRole\", \"inChi\", \"inChiKey\", \"iupacName\", \"molecularFormula\", \"molecularWeight\", \"monoisotopicMolecularWeight\", \"smiles\", \"childTaxon\", \"parentTaxon\", \"taxonRank\"]\n",
+    "    eq_schema = URIRef(\"https://schema.org/\")\n",
+    "    \n",
+    "    #iterates over all classes in the bioschemas namespace\n",
+    "    rdfs_class = URIRef(\"http://www.w3.org/2000/01/rdf-schema#Class\")\n",
+    "    eq_class = URIRef(\"http://www.w3.org/2002/07/owl#equivalentClass\")        \n",
+    "    for s in g.subjects(object=rdfs_class, unique=True) :\n",
+    "        sufix = str(s).split('/')[-1]\n",
+    "        #print(\"type: \", sufix)\n",
+    "        if bs_ns in s :\n",
+    "            g.add((s, eq_class, eq_dde_type + sufix))\n",
+    "            g.add((s, eq_class, eq_w3id_type + sufix))\n",
+    "            if sufix in types_in_schema :\n",
+    "                g.add((s, eq_class, eq_schema + sufix))\n",
+    "        if bs_ns_draft in s :\n",
+    "            g.add((s, eq_class, eq_dde_type_draft + sufix))\n",
+    "            g.add((s, eq_class, eq_w3id_type_draft + sufix))      \n",
+    "    \n",
+    "    #iterates over all properties in the bioschemas namespace\n",
+    "    schema_domain = URIRef(\"https://schema.org/domainIncludes\")\n",
+    "    eq_prop = URIRef(\"http://www.w3.org/2002/07/owl#equivalentProperty\")\n",
+    "    for s in g.subjects(predicate=schema_domain, unique=True) :\n",
+    "        sufix = str(s).split('/')[-1]\n",
+    "        #print(\"property: \", sufix)\n",
+    "        if bs_ns in s :\n",
+    "            g.add((s, eq_prop, eq_dde_type + sufix))\n",
+    "            g.add((s, eq_prop, eq_w3id_type + sufix))\n",
+    "            if sufix in properties_in_schema :\n",
+    "                g.add((s, eq_prop, eq_schema + sufix))\n",
+    "        if bs_ns_draft in s :\n",
+    "            g.add((s, eq_prop, eq_dde_type_draft + sufix))\n",
+    "            g.add((s, eq_prop, eq_w3id_type_draft + sufix))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "c47ba42a-79a9-416c-bd63-32d72df73210",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def process_equivalences(g_file, json_file, ttl_file) :\n",
+    "    #equivalences for types\n",
+    "    eq_dde_type = URIRef(\"https://discovery.biothings.io/view/bioschemastypes/\")\n",
+    "    eq_w3id_type = URIRef(\"https://w3id.org/bioschemas/terms/\")\n",
+    "    \n",
+    "    #equivalences for draft types\n",
+    "    eq_dde_type_draft = URIRef(\"https://discovery.biothings.io/view/bioschemastypesdrafts/\")\n",
+    "    eq_w3id_type_draft = URIRef(\"https://w3id.org/bioschemas/draft_terms/\")\n",
+    "    \n",
+    "    #bioschemas namespace for types and draft types\n",
+    "    bs_ns = \"https://bioschemas.org/terms/\"\n",
+    "    bs_ns_draft = \"https://bioschemas.org/draft_terms/\"\n",
+    "\n",
+    "    #load graph\n",
+    "    g = Graph()\n",
+    "    g.parse(g_file)\n",
+    "    \n",
+    "    #add equivalences\n",
+    "    add_equivalence(g, eq_dde_type, eq_w3id_type, eq_dde_type_draft, eq_w3id_type_draft, bs_ns, bs_ns_draft)\n",
+    "    \n",
+    "    #save as json\n",
+    "    g.serialize(destination=json_file, format=\"json-ld\", auto_compact=True, indent=2)\n",
+    "    #save as turtle\n",
+    "    g.serialize(destination=ttl_file, format=\"turtle\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "7515e020-7489-4238-b879-156fe1d61003",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "g_file = \"bioschemas_types_temp.json\"\n",
+    "json_file=\"bioschemas_types_temp.jsonld\"\n",
+    "ttl_file=\"bioschemas_types_temp.ttl\"\n",
+    "process_equivalences(g_file, json_file, ttl_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "f02a0c63-c83c-476a-86e9-5b3e07c8142c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "g_file = \"bioschemas_draft_types_temp.json\"\n",
+    "json_file=\"bioschemas_draft_types_temp.jsonld\"\n",
+    "ttl_file=\"bioschemas_draft_types_temp.ttl\"\n",
+    "process_equivalences(g_file, json_file, ttl_file)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fcd72bdf-31c1-4d07-bac2-941b8b00703f",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}