diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 850be2b4..f698e541 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,9 +55,10 @@ jobs:
       run: |
         python manage.py migrate
 
-    - name: Load testdata to see if it is up to date with the Django migrations
+    - name: Load all testdata to see if it is up to date with the Django migrations
       run: |
-        python manage.py loaddata fixtures/test_data.json
+        python manage.py loaddata fixtures/test_data_optimap.json
+        python manage.py loaddata fixtures/test_data_partners.json
 
     - name: Run deploy checks
       run: |
diff --git a/README.md b/README.md
index 9110e74b..64ee8999 100644
--- a/README.md
+++ b/README.md
@@ -80,10 +80,10 @@ python manage.py dumpdata --exclude=auth --exclude=contenttypes | jq > fixtures/
 
 #### Loading Test Data
 
-To load the test data into your database, run:
+To load the test data into your database, run the following command choosing one of the existing fixtures:
 
 ```bash
-python manage.py loaddata fixtures/test_data.json
+python manage.py loaddata fixtures/test_data{optimap, partners}.json
 ```
 
 #### Adding New Test Data
diff --git a/fixtures/test_data.json b/fixtures/test_data_optimap.json
similarity index 85%
rename from fixtures/test_data.json
rename to fixtures/test_data_optimap.json
index 123637aa..b0f8dab0 100644
--- a/fixtures/test_data.json
+++ b/fixtures/test_data_optimap.json
@@ -1,7 +1,24 @@
 [
+  {
+    "model": "publications.source",
+    "pk": 9,
+    "fields": {
+      "name": "OPTIMAP Test Journal",
+      "issn_l": null,
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": "http://optimap.science",
+      "abbreviated_title": null,
+      "is_oa": true,
+      "cited_by_count": null,
+      "is_preprint": true
+    }
+  },
   {
     "model": "publications.publication",
-    "pk": 1,
+    "pk": 900,
     "fields": {
       "status": "p",
       "title": "The First Article",
@@ -12,7 +29,7 @@
       "geometry": "SRID=4326;GEOMETRYCOLLECTION (POINT (7.595730774920725 51.96944097112328), POLYGON ((7.599984296478425 51.984257653537384, 7.5715788777530975 51.97057414651397, 7.570122189613329 51.950602187631205, 7.580319006590855 51.93825551711683, 7.609054957094401 51.93035649564658, 7.659674869951374 51.942256350721436, 7.6833460522228165 51.968514669138415, 7.665137450475669 51.99229098076532, 7.626171042736502 51.98982421450293, 7.599984296478425 51.984257653537384)))",
       "creationDate": "2022-10-24T12:10:53.086Z",
       "lastUpdate": "2022-10-24T12:10:53.086Z",
-      "source": "OPTIMAP Test Journal",
+      "source": 9,
       "timeperiod_startdate": "[\"2020-02-02\"]",
       "timeperiod_enddate": "[\"2022-02-20\"]",
       "provenance": "Manually added from file test_data.json using the Django management script."
@@ -20,7 +37,7 @@
   },
   {
     "model": "publications.publication",
-    "pk": 2,
+    "pk": 901,
     "fields": {
       "status": "p",
       "title": "Paper Two",
@@ -31,7 +48,7 @@
       "geometry": "SRID=4326;GEOMETRYCOLLECTION (LINESTRING (9.754609563397707 52.36630414438588, 9.813062794192035 52.41569645624003, 10.141300167111496 52.36904961184797, 10.518997966087937 52.330597538337116, 10.838242534270051 52.311358956793185, 11.058566250338231 52.220550088821824, 11.535184901427073 52.15714903642342, 12.272594889905236 52.24258143981572, 12.618817872299417 52.35532056817789, 12.911084026269464 52.2976119913985, 13.144896949445211 52.50063147184562, 13.396695482095708 52.517051586549286))",
       "creationDate": "2022-10-24T12:10:53.086Z",
       "lastUpdate": "2022-10-24T12:10:53.086Z",
-      "source": "OPTIMAP Test Journal",
+      "source": 9,
       "timeperiod_startdate": "[\"2010-01-01\"]",
       "timeperiod_enddate": "[\"2012-12-12\"]",
       "provenance": "Manually added from file test_data.json using the Django management script."
@@ -39,7 +56,7 @@
   },
   {
     "model": "publications.publication",
-    "pk": 3,
+    "pk": 902,
     "fields": {
       "status": "p",
       "title": "Paper 3",
@@ -50,7 +67,7 @@
       "geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON ((13.558502 50.990421, 13.558502 51.094036, 13.864746 51.094036, 13.864746 50.990421, 13.558502 50.990421)))",
       "creationDate": "2022-10-24T12:10:53.086Z",
       "lastUpdate": "2022-10-24T12:10:53.086Z",
-      "source": "OPTIMAP Test Journal",
+      "source": 9,
       "timeperiod_startdate": "[\"2023\"]",
       "timeperiod_enddate": "[\"2024\"]",
       "provenance": "Manually added from file test_data.json using the Django management script."
diff --git a/fixtures/test_data_partners.json b/fixtures/test_data_partners.json
new file mode 100644
index 00000000..a2b2fcbb
--- /dev/null
+++ b/fixtures/test_data_partners.json
@@ -0,0 +1,533 @@
+[
+  {
+    "model": "publications.source",
+    "pk": 1,
+    "fields": {
+      "name": "Volcanica",
+      "issn_l": "2610-3540",
+      "openalex_id": "https://openalex.org/S26103540",
+      "openalex_url": "https://openalex.org/S26103540",
+      "publisher_name": "Volcanica Society",
+      "works_count": 12,
+      "is_oa": false,
+      "cited_by_count": null
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 101,
+    "fields": {
+      "status": "p",
+      "title": "Eruption Dynamics of New Fissure on Reykjanes Peninsula",
+      "abstract": "High-resolution analysis of lava flow progression.",
+      "publicationDate": "2023-01-15",
+      "doi": "10.5710/volcanica.12345",
+      "url": "https://www.jvolcanica.org/ojs/index.php/volcanica/article/view/12345",
+      "geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON((-10 35, 30 35, 30 60, -10 60, -10 35)))",
+      "creationDate": "2024-06-10T08:00:00Z",
+      "lastUpdate": "2024-06-10T08:00:00Z",
+      "source": 1,
+      "timeperiod_startdate": ["2022-05-01"],
+      "timeperiod_enddate": ["2022-10-01"],
+      "provenance": "Imported from DOAJ on 2024-06-10."
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 102,
+    "fields": {
+      "status": "d",
+      "title": "Preliminary Survey of Volcanic Gas Emissions",
+      "abstract": "",
+      "publicationDate": null,
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T08:05:00Z",
+      "lastUpdate": "2024-06-10T08:05:00Z",
+      "source": 1,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Draft entry for Volcanica"
+    }
+  },
+
+  {
+    "model": "publications.source",
+    "pk": 2,
+    "fields": {
+      "name": "Journal of Spatial Information Science",
+      "issn_l": "1948-660X",
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": null,
+      "abbreviated_title": null,
+      "is_oa": false,
+      "cited_by_count": null
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 201,
+    "fields": {
+      "status": "p",
+      "title": "Integrating Lidar and Photogrammetry for Urban Mapping",
+      "abstract": "An end-to-end pipeline for 3D city models.",
+      "publicationDate": "2022-08-20",
+      "doi": "10.5311/JSIS.2022.08.001",
+      "url": "https://josis.org/index.php/josis/article/view/08-001",
+      "geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON((8.681 50.112,8.683 50.113,8.684 50.111,8.681 50.112)))",
+      "creationDate": "2024-06-10T09:00:00Z",
+      "lastUpdate": "2024-06-10T09:00:00Z",
+      "source": 2,
+      "timeperiod_startdate": ["2021-01-01"],
+      "timeperiod_enddate": ["2021-12-31"],
+      "provenance": "Imported from JOSIS archive"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 202,
+    "fields": {
+      "status": "p",
+      "title": "Spatial Database Performance Benchmarks",
+      "abstract": "Comparing PostGIS, Oracle Spatial, and SQL Server.",
+      "publicationDate": "2023-03-15",
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T09:05:00Z",
+      "lastUpdate": "2024-06-10T09:05:00Z",
+      "source": 2,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Manual entry – missing DOI/URL"
+    }
+  },
+
+  {
+    "model": "publications.source",
+    "pk": 3,
+    "fields": {
+      "name": "European Journal of Transport and Infrastructure Research",
+      "issn_l": "1567-7133",
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": null,
+      "abbreviated_title": null,
+      "is_oa": false,
+      "cited_by_count": null
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 301,
+    "fields": {
+      "status": "p",
+      "title": "Electrification of European Rail Networks",
+      "abstract": "Economic and environmental impacts of railway electrification.",
+      "publicationDate": "2021-05-10",
+      "doi": "10.1234/EJTIR.2021.05.010",
+      "url": "https://journals.open.tudelft.nl/ejtir/article/view/2021-05-010",
+      "geometry": "SRID=4326;GEOMETRYCOLLECTION(POINT(52.379 4.900))",
+      "creationDate": "2024-06-10T10:00:00Z",
+      "lastUpdate": "2024-06-10T10:00:00Z",
+      "source": 3,
+      "timeperiod_startdate": ["2020-01-01"],
+      "timeperiod_enddate": ["2020-12-31"],
+      "provenance": "Imported from EJTIR online"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 302,
+    "fields": {
+      "status": "p",
+      "title": "Modal Split Analysis in Urban Regions",
+      "abstract": "A survey of travel behavior patterns.",
+      "publicationDate": null,
+      "doi": null,
+      "url": "https://journals.open.tudelft.nl/ejtir/article/view/2022-02-015",
+      "geometry": null,
+      "creationDate": "2024-06-10T10:05:00Z",
+      "lastUpdate": "2024-06-10T10:05:00Z",
+      "source": 3,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "URL only; missing DOI & geometry"
+    }
+  },
+
+  {
+    "model": "publications.source",
+    "pk": 4,
+    "fields": {
+      "name": "AGILE: GIScience Series",
+      "issn_l": "2700-8150",
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": null,
+      "abbreviated_title": null,
+      "is_oa": false,
+      "cited_by_count": null
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 401,
+    "fields": {
+      "status": "p",
+      "title": "Participatory Mapping for Coastal Resilience",
+      "abstract": "Case studies from Mediterranean communities.",
+      "publicationDate": "2020-11-05",
+      "doi": "10.5194/ags-2020-05",
+      "url": "https://agile-giss.copernicus.org/articles/20/05/2020/",
+      "geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON((3.0 43.0,3.1 43.1,3.2 43.0,3.0 43.0)))",
+      "creationDate": "2024-06-10T11:00:00Z",
+      "lastUpdate": "2024-06-10T11:00:00Z",
+      "source": 4,
+      "timeperiod_startdate": ["2019-01-01"],
+      "timeperiod_enddate": ["2019-12-31"],
+      "provenance": "Imported from Copernicus AGILE archive"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 402,
+    "fields": {
+      "status": "d",
+      "title": "Edge Computing for Geoanalytics",
+      "abstract": "",
+      "publicationDate": null,
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T11:05:00Z",
+      "lastUpdate": "2024-06-10T11:05:00Z",
+      "source": 4,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Draft entry; minimal fields"
+    }
+  },
+  {
+    "model": "publications.source",
+    "pk": 1,
+    "fields": {
+      "name": "Volcanica",
+      "issn_l": "2610-3540",
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": "https://www.jvolcanica.org/ojs/index.php/volcanica",
+      "abbreviated_title": null,
+      "is_oa": false,
+      "cited_by_count": null,
+      "is_preprint": false
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 102,
+    "fields": {
+      "status": "d",
+      "title": "Preliminary Survey of Volcanic Gas Emissions",
+      "abstract": "",
+      "publicationDate": null,
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T08:05:00Z",
+      "lastUpdate": "2024-06-10T08:05:00Z",
+      "source": 1,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Draft entry for Volcanica"
+    }
+  },
+
+  {
+    "model": "publications.source",
+    "pk": 2,
+    "fields": {
+      "name": "Journal of Spatial Information Science",
+      "issn_l": "1948-660X",
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": "https://josis.org/index.php/josis/index",
+      "abbreviated_title": null,
+      "is_oa": false,
+      "cited_by_count": null,
+      "is_preprint": false
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 201,
+    "fields": {
+      "status": "p",
+      "title": "Integrating Lidar and Photogrammetry for Urban Mapping",
+      "abstract": "An end-to-end pipeline for 3D city models.",
+      "publicationDate": "2022-08-20",
+      "doi": "10.5311/JSIS.2022.08.001",
+      "url": "https://josis.org/index.php/josis/article/view/08-001",
+      "geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON((8.681 50.112,8.683 50.113,8.684 50.111,8.681 50.112)))",
+      "creationDate": "2024-06-10T09:00:00Z",
+      "lastUpdate": "2024-06-10T09:00:00Z",
+      "source": 2,
+      "timeperiod_startdate": ["2021-01-01"],
+      "timeperiod_enddate": ["2021-12-31"],
+      "provenance": "Imported from JOSIS archive"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 202,
+    "fields": {
+      "status": "p",
+      "title": "Spatial Database Performance Benchmarks",
+      "abstract": "Comparing PostGIS, Oracle Spatial, and SQL Server.",
+      "publicationDate": "2023-03-15",
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T09:05:00Z",
+      "lastUpdate": "2024-06-10T09:05:00Z",
+      "source": 2,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Manual entry – missing DOI/URL"
+    }
+  },
+
+  {
+    "model": "publications.source",
+    "pk": 3,
+    "fields": {
+      "name": "European Journal of Transport and Infrastructure Research",
+      "issn_l": "1567-7133",
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": "https://journals.open.tudelft.nl/ejtir",
+      "abbreviated_title": null,
+      "is_oa": false,
+      "cited_by_count": null,
+      "is_preprint": false
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 301,
+    "fields": {
+      "status": "p",
+      "title": "Electrification of European Rail Networks",
+      "abstract": "Economic and environmental impacts of railway electrification.",
+      "publicationDate": "2021-05-10",
+      "doi": "10.1234/EJTIR.2021.05.010",
+      "url": "https://journals.open.tudelft.nl/ejtir/article/view/2021-05-010",
+      "geometry": "SRID=4326;GEOMETRYCOLLECTION(POINT(52.379 4.900))",
+      "creationDate": "2024-06-10T10:00:00Z",
+      "lastUpdate": "2024-06-10T10:00:00Z",
+      "source": 3,
+      "timeperiod_startdate": ["2020-01-01"],
+      "timeperiod_enddate": ["2020-12-31"],
+      "provenance": "Imported from EJTIR online"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 302,
+    "fields": {
+      "status": "p",
+      "title": "Modal Split Analysis in Urban Regions",
+      "abstract": "A survey of travel behavior patterns.",
+      "publicationDate": null,
+      "doi": null,
+      "url": "https://journals.open.tudelft.nl/ejtir/article/view/2022-02-015",
+      "geometry": null,
+      "creationDate": "2024-06-10T10:05:00Z",
+      "lastUpdate": "2024-06-10T10:05:00Z",
+      "source": 3,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "URL only; missing DOI & geometry"
+    }
+  },
+
+  {
+    "model": "publications.source",
+    "pk": 4,
+    "fields": {
+      "name": "AGILE: GIScience Series",
+      "issn_l": "2700-8150",
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": "https://agile-giss.copernicus.org",
+      "abbreviated_title": null,
+      "is_oa": false,
+      "cited_by_count": null,
+      "is_preprint": false
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 401,
+    "fields": {
+      "status": "p",
+      "title": "Participatory Mapping for Coastal Resilience",
+      "abstract": "Case studies from Mediterranean communities.",
+      "publicationDate": "2020-11-05",
+      "doi": "10.5194/ags-2020-05",
+      "url": "https://agile-giss.copernicus.org/articles/20/05/2020/",
+      "geometry": "SRID=4326;GEOMETRYCOLLECTION(POLYGON((3.0 43.0,3.1 43.1,3.2 43.0,3.0 43.0)))",
+      "creationDate": "2024-06-10T11:00:00Z",
+      "lastUpdate": "2024-06-10T11:00:00Z",
+      "source": 4,
+      "timeperiod_startdate": ["2019-01-01"],
+      "timeperiod_enddate": ["2019-12-31"],
+      "provenance": "Imported from Copernicus AGILE archive"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 402,
+    "fields": {
+      "status": "d",
+      "title": "Edge Computing for Geoanalytics",
+      "abstract": "",
+      "publicationDate": null,
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T11:05:00Z",
+      "lastUpdate": "2024-06-10T11:05:00Z",
+      "source": 4,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Draft entry; minimal fields"
+    }
+  },
+  {
+    "model": "publications.source",
+    "pk": 5,
+    "fields": {
+      "name": "arXiv",
+      "issn_l": null,
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": "https://arxiv.org",
+      "abbreviated_title": null,
+      "is_oa": true,
+      "cited_by_count": null,
+      "is_preprint": true
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 501,
+    "fields": {
+      "status": "p",
+      "title": "Quantum Entanglement in Large-Scale Systems",
+      "abstract": "An exploration of entanglement scaling in quantum networks.",
+      "publicationDate": null,
+      "doi": "10.48550/arXiv.2101.00001",
+      "url": "https://arxiv.org/abs/2101.00001",
+      "geometry": null,
+      "creationDate": "2024-06-10T12:00:00Z",
+      "lastUpdate": "2024-06-10T12:00:00Z",
+      "source": 5,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Imported from arXiv"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 502,
+    "fields": {
+      "status": "d",
+      "title": "Early Results on Neural Rendering",
+      "abstract": "",
+      "publicationDate": null,
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T12:05:00Z",
+      "lastUpdate": "2024-06-10T12:05:00Z",
+      "source": 5,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Draft entry for preprint test"
+    }
+  },
+  {
+    "model": "publications.source",
+    "pk": 6,
+    "fields": {
+      "name": "bioRxiv",
+      "issn_l": null,
+      "openalex_id": null,
+      "openalex_url": null,
+      "publisher_name": null,
+      "works_count": null,
+      "homepage_url": "https://www.biorxiv.org",
+      "abbreviated_title": null,
+      "is_oa": true,
+      "cited_by_count": null,
+      "is_preprint": true
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 601,
+    "fields": {
+      "status": "p",
+      "title": "CRISPR-based Gene Drives in Mosquito Populations",
+      "abstract": "Modeling gene‐drive spread through wild Anopheles populations.",
+      "publicationDate": "2023-09-12",
+      "doi": "10.1101/2023.09.12.558924",
+      "url": "https://www.biorxiv.org/content/10.1101/2023.09.12.558924v1",
+      "geometry": null,
+      "creationDate": "2024-06-10T13:00:00Z",
+      "lastUpdate": "2024-06-10T13:00:00Z",
+      "source": 6,
+      "timeperiod_startdate": ["2023-01-01"],
+      "timeperiod_enddate": ["2023-12-31"],
+      "provenance": "Imported from bioRxiv"
+    }
+  },
+  {
+    "model": "publications.publication",
+    "pk": 602,
+    "fields": {
+      "status": "d",
+      "title": "Preprint on Single-Cell Sequencing Pipelines",
+      "abstract": "",
+      "publicationDate": null,
+      "doi": null,
+      "url": null,
+      "geometry": null,
+      "creationDate": "2024-06-10T13:05:00Z",
+      "lastUpdate": "2024-06-10T13:05:00Z",
+      "source": 6,
+      "timeperiod_startdate": [],
+      "timeperiod_enddate": [],
+      "provenance": "Draft entry for bioRxiv preprint test"
+    }
+  }
+]
diff --git a/optimap/settings.py b/optimap/settings.py
index cea378da..ff13fa82 100644
--- a/optimap/settings.py
+++ b/optimap/settings.py
@@ -37,6 +37,8 @@
 # SECURITY WARNING: don't run with debug turned on in production!
 DEBUG = env('OPTIMAP_DEBUG', default=False)
 
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
 ALLOWED_HOSTS = [i.strip('[]') for i in env('OPTIMAP_ALLOWED_HOST', default='*').split(',')]
 
 OPTIMAP_SUPERUSER_EMAILS = [i.strip('[]') for i in env('OPTIMAP_SUPERUSER_EMAILS', default='').split(',')]
@@ -205,6 +207,7 @@
 OAI_PASSWORD = env("OPTIMAP_OAI_PASSWORD", default="")
 EMAIL_SEND_DELAY = 2
 DATA_DUMP_INTERVAL_HOURS = 6
+OPENALEX_MAILTO = "login@optimap.science"
 
 MIDDLEWARE = [
     'django.middleware.cache.UpdateCacheMiddleware',
diff --git a/publications/admin.py b/publications/admin.py
index ec03f1fb..be359126 100644
--- a/publications/admin.py
+++ b/publications/admin.py
@@ -1,3 +1,6 @@
+import logging
+logger = logging.getLogger(__name__)
+
 from django.contrib import admin, messages
 from leaflet.admin import LeafletGeoAdmin
 from publications.models import Publication, Source, HarvestingEvent, BlockedEmail, BlockedDomain
@@ -21,16 +24,19 @@ def make_draft(modeladmin, request, queryset):
 
 @admin.action(description="Trigger harvesting for selected sources")
 def trigger_harvesting_for_specific(modeladmin, request, queryset):
-    user = request.user
-    for source in queryset:
-        harvest_oai_endpoint(source.id, user)  
-
+    return trigger_harvesting_for_set(queryset, request)
+    
 @admin.action(description="Trigger harvesting for all sources")
 def trigger_harvesting_for_all(modeladmin, request, queryset):
     all_sources = Source.objects.all()
+    return trigger_harvesting_for_set(all_sources, request)
+    
+def trigger_harvesting_for_set(sources, request):
     user = request.user
-    for source in all_sources:
-        harvest_oai_endpoint(source.id, user) 
+
+    for source in sources:
+        added, spatial, temporal = harvest_oai_endpoint(source.id, user)
+        logger.info(f"Harvested {added} publications from source {source.id} ({source.url_field}) of which {spatial} have spatial data and {temporal} have temporal data.")
 
 @admin.action(description="Schedule harvesting for selected sources")
 def schedule_harvesting(modeladmin, request, queryset):
diff --git a/publications/api.py b/publications/api.py
index b0cc96b8..0509b7f4 100644
--- a/publications/api.py
+++ b/publications/api.py
@@ -1,10 +1,14 @@
 """Publications API URL Configuration."""
 
 from rest_framework import routers
-
-from publications.viewsets import PublicationViewSet, SubscriptionViewset
+from publications.viewsets import ( SourceViewSet,
+    PublicationViewSet,
+    SubscriptionViewSet,
+)
 
 router = routers.DefaultRouter()
-router.register(r"publications", PublicationViewSet)
-router.register(r"subscriptions", SubscriptionViewset, basename='subscription')
+router.register(r"sources", SourceViewSet, basename="source")
+router.register(r"publications", PublicationViewSet, basename="publication")
+router.register(r"subscriptions", SubscriptionViewSet, basename="subscription")
+
 urlpatterns = router.urls
diff --git a/publications/management/__init__.py b/publications/management/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/publications/management/commands/__init__.py b/publications/management/commands/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/publications/management/commands/sync_source_metadata.py b/publications/management/commands/sync_source_metadata.py
new file mode 100644
index 00000000..4a5d4428
--- /dev/null
+++ b/publications/management/commands/sync_source_metadata.py
@@ -0,0 +1,112 @@
+# publications/management/commands/sync_source_metadata.py
+
+import logging
+import time
+import socket
+import os
+from django.core.management.base import BaseCommand
+from django.contrib.gis.geos import Point
+from geopy.geocoders import Nominatim
+from geopy.exc import GeocoderServiceError
+from publications.models import Source
+import requests
+
+from pyalex import Sources  # optional, install pyalex for client support
+
+logger = logging.getLogger(__name__)
+
+ISSN_ENDPOINT = "https://api.openalex.org/sources/issn:{issn}"
+
+class Command(BaseCommand):
+    help = "Full sync: metadata + geolocation + works list from OpenAlex."
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.geolocator = Nominatim(user_agent="optimap-sync")
+
+    def fetch_metadata(self, issn: str) -> dict | None:
+        # Try PyAlex first
+        try:
+            client = Sources()
+            return client.get_single_source(issn, id_type="issn")
+        except Exception:
+            pass
+
+        # Fallback to HTTP
+        try:
+            resp = requests.get(ISSN_ENDPOINT.format(issn=issn), timeout=10)
+            if resp.status_code == 302 and "Location" in resp.headers:
+                resp = requests.get(resp.headers["Location"], timeout=10)
+            if resp.status_code == 200:
+                return resp.json()
+        except requests.RequestException as e:
+            logger.debug("HTTP metadata fetch failed for %s: %s", issn, e)
+        return None
+
+    def handle(self, *args, **options):
+        # DNS check
+        try:
+            ip = socket.gethostbyname("api.openalex.org")
+            self.stdout.write(f"DNS: api.openalex.org → {ip}")
+        except socket.error as e:
+            self.stderr.write(f"DNS lookup failed: {e}")
+            return
+        if ip.startswith(("127.", "10.", "192.168.", "172.16.", "::1")):
+            self.stderr.write("OpenAlex resolves to private IP—aborting.")
+            return
+
+        session = requests.Session()
+        session.trust_env = False
+
+        for src in Source.objects.exclude(issn_l__isnull=True):
+            self.stdout.write(f"Syncing ISSN={src.issn_l}")
+            data = self.fetch_metadata(src.issn_l)
+            if not data:
+                self.stderr.write(f"{src.issn_l}: no metadata\n")
+                continue
+
+            defaults = {
+                "openalex_id":    data.get("id"),
+                "openalex_url":   data.get("id"),
+                "publisher_name": (data.get("host_organization") or {}).get("display_name")
+                                   or data.get("display_name"),
+            }
+
+            # geolocation from OpenAlex
+            loc = data.get("location", {})
+            lat, lon = loc.get("lat"), loc.get("lon")
+            if lat and lon:
+                defaults["geometry"] = Point(lon, lat)
+            elif not src.geometry:
+                # fallback geocode by name
+                try:
+                    geo = self.geolocator.geocode(defaults["publisher_name"])
+                    if geo:
+                        defaults["geometry"] = Point(geo.longitude, geo.latitude)
+                except GeocoderServiceError as ge:
+                    logger.debug("Geocoding failed: %s", ge)
+
+            # save metadata & geometry
+            src, _ = Source.objects.update_or_create(issn_l=src.issn_l, defaults=defaults)
+            self.stdout.write(f"{src.issn_l}: metadata & geo synced")
+
+            # fetch works list
+            source_id = src.openalex_id.rstrip("/").split("/")[-1]
+            resp = session.get(
+                "https://api.openalex.org/works",
+                params={"filter": f"locations.source.id:{source_id}", "per-page": 100},
+                timeout=30,
+                headers={"Accept": "application/json"},
+            )
+            if resp.status_code == 200:
+                results = resp.json().get("results", [])
+                ids = [w["id"] for w in results if w.get("id")]
+                src.articles = ids
+                src.save(update_fields=["articles"])
+                self.stdout.write(f"{src.issn_l}: fetched {len(ids)} works")
+            else:
+                logger.warning("Works fetch %s → %s", resp.status_code, resp.text)
+
+            time.sleep(0.2)
+
+        self.stdout.write("Full sync complete.")
diff --git a/publications/management/commands/update_openalex_journals.py b/publications/management/commands/update_openalex_journals.py
new file mode 100644
index 00000000..25314e89
--- /dev/null
+++ b/publications/management/commands/update_openalex_journals.py
@@ -0,0 +1,81 @@
+# publications/management/commands/update_openalex_journals.py
+
+import logging
+import requests
+
+from django.core.management.base import BaseCommand
+from django.db.models import Q
+from publications.models import Source
+
+logger = logging.getLogger(__name__)
+
+ISSN_ENDPOINT   = "https://api.openalex.org/sources/issn:{issn}"
+SEARCH_ENDPOINT = "https://api.openalex.org/sources"
+
+def fetch_by_issn(issn: str) -> dict | None:
+    try:
+        resp = requests.get(ISSN_ENDPOINT.format(issn=issn), timeout=10)
+        if resp.status_code == 302 and "Location" in resp.headers:
+            resp = requests.get(resp.headers["Location"], timeout=10)
+        if resp.status_code == 200:
+            return resp.json()
+    except requests.RequestException as e:
+        logger.debug("ISSN lookup failed for %s: %s", issn, e)
+    return None
+
+def fetch_by_name(name: str) -> dict | None:
+    try:
+        resp = requests.get(SEARCH_ENDPOINT,
+                            params={"filter": f"display_name.search:{name}"},
+                            timeout=10)
+        resp.raise_for_status()
+        results = resp.json().get("results", [])
+        return results[0] if results else None
+    except requests.RequestException as e:
+        logger.debug("Name lookup failed for %s: %s", name, e)
+    return None
+
+class Command(BaseCommand):
+    help = "Update Source metadata from OpenAlex (ISSN-based or name lookup)."
+
+    def handle(self, *args, **options):
+        qs = Source.objects.filter(Q(issn_l__isnull=False) | Q(is_preprint=True))
+        total = qs.count()
+        self.stdout.write(f"Found {total} source(s) to update.\n")
+
+        for src in qs:
+            key = src.issn_l or src.name
+            self.stdout.write(f"[{key}] querying OpenAlex…")
+
+            # log the ISSN or name we're using
+            logger.info("Fetching source metadata for %s", key)
+
+            # fetch metadata
+            data = fetch_by_issn(src.issn_l) if src.issn_l else fetch_by_name(src.name)
+            if not data:
+                logger.info("Skipped ISSN=%s: no OpenAlex data", src.issn_l)
+                continue
+            
+            changed = False
+            def safe_upd(field: str, new):
+                nonlocal changed
+                old = getattr(src, field, None)
+                if new and new != old:
+                    logger.info("ISSN=%s: %s changed %r → %r", src.issn_l, field, old, new)
+                    setattr(src, field, new)
+                    changed = True
+
+            safe_upd("openalex_url",   data.get("id"))
+            safe_upd("works_count",    data.get("works_count"))
+            # host_organization may be nested under "host_organization"
+            host = data.get("host_organization") or {}
+            publisher = host.get("display_name") or data.get("display_name")
+            safe_upd("publisher_name", publisher)
+
+            if changed:
+                src.save()
+                self.stdout.write(f"[{key}] saved\n")
+            else:
+                self.stdout.write(f"[{key}] nothing changed\n")
+
+        self.stdout.write("Done updating OpenAlex metadata.")
diff --git a/publications/migrations/0001_initial.py b/publications/migrations/0001_initial.py
index 4f69d144..cf8b6388 100644
--- a/publications/migrations/0001_initial.py
+++ b/publications/migrations/0001_initial.py
@@ -1,4 +1,4 @@
-# Generated by Django 5.1.7 on 2025-04-08 10:02
+# Generated by Django 5.1.9 on 2025-06-17 09:37
 
 import django.contrib.auth.models
 import django.contrib.auth.validators
@@ -21,6 +21,31 @@ class Migration(migrations.Migration):
     ]
 
     operations = [
+        migrations.CreateModel(
+            name='Source',
+            fields=[
+                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
+                ('url_field', models.URLField(max_length=999)),
+                ('harvest_interval_minutes', models.IntegerField(default=4320)),
+                ('last_harvest', models.DateTimeField(auto_now_add=True, null=True)),
+                ('collection_name', models.CharField(blank=True, max_length=255, null=True)),
+                ('tags', models.CharField(blank=True, max_length=1024, null=True)),
+                ('is_preprint', models.BooleanField(default=False)),
+                ('name', models.CharField(max_length=255)),
+                ('issn_l', models.CharField(blank=True, max_length=9, null=True)),
+                ('openalex_id', models.CharField(blank=True, max_length=50, null=True)),
+                ('openalex_url', models.URLField(blank=True, max_length=512, null=True)),
+                ('publisher_name', models.CharField(blank=True, max_length=255, null=True)),
+                ('works_count', models.IntegerField(blank=True, null=True)),
+                ('homepage_url', models.URLField(blank=True, max_length=512, null=True)),
+                ('abbreviated_title', models.CharField(blank=True, max_length=255, null=True)),
+                ('is_oa', models.BooleanField(default=False)),
+                ('cited_by_count', models.IntegerField(blank=True, null=True)),
+            ],
+            options={
+                'ordering': ['name'],
+            },
+        ),
         migrations.CreateModel(
             name='CustomUser',
             fields=[
@@ -35,8 +60,6 @@ class Migration(migrations.Migration):
                 ('is_staff', models.BooleanField(default=False, help_text='Designates whether the user can log into this admin site.', verbose_name='staff status')),
                 ('is_active', models.BooleanField(default=True, help_text='Designates whether this user should be treated as active. Unselect this instead of deleting accounts.', verbose_name='active')),
                 ('date_joined', models.DateTimeField(default=django.utils.timezone.now, verbose_name='date joined')),
-                ('deleted', models.BooleanField(default=False)),
-                ('deleted_at', models.DateTimeField(blank=True, null=True)),
                 ('groups', models.ManyToManyField(blank=True, related_name='publications_users', to='auth.group')),
                 ('user_permissions', models.ManyToManyField(blank=True, related_name='publications_users_permissions', to='auth.permission')),
             ],
@@ -81,19 +104,6 @@ class Migration(migrations.Migration):
                 ('sent_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, to=settings.AUTH_USER_MODEL)),
             ],
         ),
-        migrations.CreateModel(
-            name='Source',
-            fields=[
-                ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
-                ('creationDate', models.DateTimeField(auto_now_add=True)),
-                ('lastUpdate', models.DateTimeField(auto_now=True)),
-                ('url_field', models.URLField(max_length=999)),
-                ('harvest_interval_minutes', models.IntegerField(default=4320)),
-                ('last_harvest', models.DateTimeField(auto_now_add=True, null=True)),
-                ('created_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_creator', to=settings.AUTH_USER_MODEL, verbose_name='Created by')),
-                ('updated_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, on_update=True, related_name='%(app_label)s_%(class)s_updater', to=settings.AUTH_USER_MODEL, verbose_name='Updated by')),
-            ],
-        ),
         migrations.CreateModel(
             name='HarvestingEvent',
             fields=[
@@ -139,7 +149,6 @@ class Migration(migrations.Migration):
                 ('creationDate', models.DateTimeField(auto_now_add=True)),
                 ('lastUpdate', models.DateTimeField(auto_now=True)),
                 ('doi', models.CharField(blank=True, max_length=1024, null=True, unique=True)),
-                ('source', models.CharField(blank=True, max_length=4096, null=True)),
                 ('provenance', models.TextField(blank=True, null=True)),
                 ('publicationDate', models.DateField(blank=True, null=True)),
                 ('abstract', models.TextField(blank=True, null=True)),
@@ -150,6 +159,7 @@ class Migration(migrations.Migration):
                 ('created_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='%(app_label)s_%(class)s_creator', to=settings.AUTH_USER_MODEL, verbose_name='Created by')),
                 ('job', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.CASCADE, related_name='publications', to='publications.harvestingevent')),
                 ('updated_by', django_currentuser.db.models.fields.CurrentUserField(default=django_currentuser.middleware.get_current_authenticated_user, null=True, on_delete=django.db.models.deletion.CASCADE, on_update=True, related_name='%(app_label)s_%(class)s_updater', to=settings.AUTH_USER_MODEL, verbose_name='Updated by')),
+                ('source', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='publications', to='publications.source')),
             ],
             options={
                 'ordering': ['-id'],
diff --git a/publications/migrations/0002_source_collection_name_source_tags.py b/publications/migrations/0002_source_collection_name_source_tags.py
deleted file mode 100644
index a8ed90e0..00000000
--- a/publications/migrations/0002_source_collection_name_source_tags.py
+++ /dev/null
@@ -1,33 +0,0 @@
-# Generated by Django 5.1.7 on 2025-04-21 19:25
-
-from django.db import migrations, models
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ("publications", "0001_initial"),
-    ]
-
-    operations = [
-        migrations.AddField(
-            model_name="source",
-            name="collection_name",
-            field=models.CharField(
-                blank=True,
-                help_text="Identifier for a set or group of journals (e.g., 'Health Journals', 'TestBatch_Apr2025').",
-                max_length=255,
-                null=True,
-            ),
-        ),
-        migrations.AddField(
-            model_name="source",
-            name="tags",
-            field=models.CharField(
-                blank=True,
-                help_text="Comma-separated tags to provide additional context",
-                max_length=1024,
-                null=True,
-            ),
-        ),
-    ]
diff --git a/publications/migrations/0003_remove_customuser_deleted_and_more.py b/publications/migrations/0003_remove_customuser_deleted_and_more.py
deleted file mode 100644
index 37f01934..00000000
--- a/publications/migrations/0003_remove_customuser_deleted_and_more.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# Generated by Django 5.1.9 on 2025-05-21 13:35
-
-from django.db import migrations
-
-
-class Migration(migrations.Migration):
-
-    dependencies = [
-        ('publications', '0002_source_collection_name_source_tags'),
-    ]
-
-    operations = [
-        migrations.RemoveField(
-            model_name='customuser',
-            name='deleted',
-        ),
-        migrations.RemoveField(
-            model_name='customuser',
-            name='deleted_at',
-        ),
-    ]
diff --git a/publications/models.py b/publications/models.py
index 9b928046..d176bd78 100644
--- a/publications/models.py
+++ b/publications/models.py
@@ -1,16 +1,19 @@
+import logging
+
+from django.contrib.auth.models import AbstractUser, Group, Permission
 from django.contrib.gis.db import models
 from django.contrib.postgres.fields import ArrayField
+from django.conf import settings
 from django_currentuser.db.models import CurrentUserField
 from django_q.models import Schedule
 from django.utils.timezone import now
 from django.contrib.auth.models import AbstractUser, Group, Permission
-from django.utils.timezone import now
-# handle import/export relations, see https://django-import-export.readthedocs.io/en/stable/advanced_usage.html#creating-non-existent-relations
 from import_export import fields, resources
 from import_export.widgets import ForeignKeyWidget
-from django.conf import settings
+from django.core.exceptions import ValidationError
+from stdnum.issn import is_valid as is_valid_issn
+from django.contrib.gis.db import models as gis_models 
 
-import logging
 logger = logging.getLogger(__name__)
 
 STATUS_CHOICES = (
@@ -26,15 +29,12 @@ class CustomUser(AbstractUser):
     user_permissions = models.ManyToManyField(Permission, related_name="publications_users_permissions", blank=True)
 
 class Publication(models.Model):
-    # required fields
     title = models.TextField()
     status = models.CharField(max_length=1, choices=STATUS_CHOICES, default="d")
-    created_by = CurrentUserField( # see useful hint at https://github.com/zsoldosp/django-currentuser/issues/69
+    created_by = CurrentUserField(
         verbose_name=("Created by"),
         related_name="%(app_label)s_%(class)s_creator",
     )
-
-    # automatic fields
     creationDate = models.DateTimeField(auto_now_add=True)
     lastUpdate = models.DateTimeField(auto_now=True)
     updated_by = CurrentUserField(
@@ -42,103 +42,47 @@ class Publication(models.Model):
         related_name="%(app_label)s_%(class)s_updater",
         on_update=True,
     )
-    
-    # optional fields
+
     doi = models.CharField(max_length=1024, unique=True, blank=True, null=True)
-    source = models.CharField(max_length=4096, null=True, blank=True) # journal, conference, preprint repo, ..
+    source = models.ForeignKey('Source', on_delete=models.SET_NULL, null=True, related_name='publications')
     provenance = models.TextField(null=True, blank=True)
     publicationDate = models.DateField(null=True, blank=True)
     abstract = models.TextField(null=True, blank=True)
     url = models.URLField(max_length=1024, null=True, blank=True, unique=True)
-    geometry = models.GeometryCollectionField(verbose_name='Publication geometry/ies', srid = 4326, null=True, blank=True)# https://docs.openalex.org/api-entities/sources
+    geometry = models.GeometryCollectionField(
+        verbose_name='Publication geometry/ies', srid=4326, null=True, blank=True
+    )
     timeperiod_startdate = ArrayField(models.CharField(max_length=1024, null=True), null=True, blank=True)
     timeperiod_enddate = ArrayField(models.CharField(max_length=1024, null=True), null=True, blank=True)
-
-    # Linking to HarvestingEvent as "job"
     job = models.ForeignKey(
-        'HarvestingEvent', 
-        on_delete=models.CASCADE, 
-        related_name='publications', 
-        null=True, 
-        blank=True
+        'HarvestingEvent', on_delete=models.CASCADE, related_name='publications', null=True, blank=True
     )
 
-
-    def get_absolute_url(self):
-        return "/api/v1/publications/%i.json" % self.id
-        # http://localhost:8000/api/v1/publications/5.json
-
     class Meta:
         ordering = ['-id']
         constraints = [
             models.UniqueConstraint(fields=['doi', 'url'], name='unique_publication_entry')
         ]
 
-
     def __str__(self):
-        """Return string representation."""
         return self.title
 
-class Source(models.Model):
-    # automatic fields
-    creationDate = models.DateTimeField(auto_now_add=True)
-    lastUpdate = models.DateTimeField(auto_now=True)
-    created_by = CurrentUserField(
-        verbose_name=("Created by"),
-        related_name="%(app_label)s_%(class)s_creator",
-    )
-    updated_by = CurrentUserField(
-        verbose_name=("Updated by"),
-        related_name="%(app_label)s_%(class)s_updater",
-        on_update=True,
-    )
-
-    url_field = models.URLField(max_length = 999)
-    harvest_interval_minutes = models.IntegerField(default=60*24*3)
-    last_harvest = models.DateTimeField(auto_now_add=True,null=True)
-
-    collection_name = models.CharField(
-        max_length=255,
-        blank=True,
-        null=True,
-        help_text="Identifier for a set or group of journals (e.g., 'Health Journals', 'TestBatch_Apr2025')."
-    )
-    tags = models.CharField(
-        max_length=1024,
-        blank=True,
-        null=True,
-        help_text="Comma-separated tags to provide additional context"
-    )
-
-    def save(self, *args, **kwargs):
-        super().save(*args, **kwargs)
-        Schedule.objects.filter(name=f"Harvest Source {self.id}").delete()  # Avoid duplicates
-        Schedule.objects.create(
-            func='publications.tasks.harvest_oai_endpoint',
-            args=str(self.id),
-            schedule_type=Schedule.MINUTES,
-            minutes=self.harvest_interval_minutes,
-            name=f"Harvest Source {self.id}",
-        )
-
-
 class Subscription(models.Model):
     user = models.ForeignKey(CustomUser, on_delete=models.CASCADE, related_name="subscriptions", null=True, blank=True)
     name = models.CharField(max_length=4096, default="default_subscription")
-    search_term = models.CharField(max_length=4096,null=True)
+    search_term = models.CharField(max_length=4096, null=True)
     timeperiod_startdate = models.DateField(null=True)
     timeperiod_enddate = models.DateField(null=True)
     region = models.GeometryCollectionField(null=True, blank=True)
-    subscribed = models.BooleanField(default=True) 
-
-    def __str__(self):
-        """Return string representation."""
-        return self.name
+    subscribed = models.BooleanField(default=True)
 
     class Meta:
         ordering = ['name']
         verbose_name = "subscription"
 
+    def __str__(self):
+        return self.name
+
 class EmailLog(models.Model):
     TRIGGER_CHOICES = [
         ("admin", "Admin Panel"),
@@ -151,8 +95,8 @@ class EmailLog(models.Model):
     email_content = models.TextField(blank=True, null=True)
     sent_by = models.ForeignKey(CustomUser, null=True, blank=True, on_delete=models.SET_NULL)
     trigger_source = models.CharField(max_length=50, choices=TRIGGER_CHOICES, default="manual")
-    status = models.CharField(max_length=10, choices=STATUS_CHOICES, default="success") 
-    error_message = models.TextField(null=True, blank=True) 
+    status = models.CharField(max_length=10, choices=STATUS_CHOICES, default="success")
+    error_message = models.TextField(null=True, blank=True)
 
     def __str__(self):
         sender = self.sent_by.email if self.sent_by else "System"
@@ -160,55 +104,36 @@ def __str__(self):
 
     @classmethod
     def log_email(cls, recipient, subject, content, sent_by=None, trigger_source="manual", status="success", error_message=None):
-        """Logs the sent email, storing who triggered it and how it was sent."""
         cls.objects.create(
             recipient_email=recipient,
             subject=subject,
             sent_at=now(),
             email_content=content,
             sent_by=sent_by,
-            trigger_source=trigger_source, 
-            status=status,  
-            error_message=error_message,  
-
+            trigger_source=trigger_source,
+            status=status,
+            error_message=error_message,
         )
 
-class PublicationResource(resources.ModelResource):
-    #created_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name='username')
-    #updated_by = models.ForeignKey(settings.AUTH_USER_MODEL, on_delete=models.CASCADE, related_name='username')
-    created_by = fields.Field(
-        column_name='created_by',
-        attribute='created_by',
-        widget=ForeignKeyWidget(CustomUser, field='username'))
-    updated_by = fields.Field(
-        column_name='updated_by',
-        attribute='updated_by',
-        widget=ForeignKeyWidget(settings.AUTH_USER_MODEL, field='username'))
-    
-    class Meta:
-        model = Publication
-        fields = ('created_by','updated_by',)
-
 class HarvestingEvent(models.Model):
     source = models.ForeignKey('Source', on_delete=models.CASCADE, related_name='harvesting_events')
-    user = models.ForeignKey(CustomUser, on_delete=models.SET_NULL, null=True, blank=True) 
-    started_at = models.DateTimeField(auto_now_add=True)  
-    completed_at = models.DateTimeField(null=True, blank=True) 
+    user = models.ForeignKey(CustomUser, on_delete=models.SET_NULL, null=True, blank=True)
+    started_at = models.DateTimeField(auto_now_add=True)
+    completed_at = models.DateTimeField(null=True, blank=True)
     status = models.CharField(
         max_length=16,
-        choices=(
+        choices=[
             ('pending', 'Pending'),
             ('in_progress', 'In Progress'),
             ('completed', 'Completed'),
             ('failed', 'Failed'),
-        ),
+        ],
         default='pending'
-    )  
+    )
 
     def __str__(self):
         return f"Harvesting Event ({self.status}) for {self.source.url_field} at {self.started_at}"
 
-
 class UserProfile(models.Model):
     user = models.OneToOneField(CustomUser, on_delete=models.CASCADE)
     notify_new_manuscripts = models.BooleanField(default=False)
@@ -231,3 +156,47 @@ class BlockedDomain(models.Model):
 
     def __str__(self):
         return self.domain
+
+class Source(models.Model):
+    url_field                = models.URLField(max_length=999)
+    harvest_interval_minutes = models.IntegerField(default=60*24*3)
+    last_harvest             = models.DateTimeField(auto_now_add=True, null=True)
+    collection_name          = models.CharField(max_length=255, blank=True, null=True)
+    tags                     = models.CharField(max_length=1024, blank=True, null=True)
+    is_preprint              = models.BooleanField(default=False)
+    name                     = models.CharField(max_length=255)
+    issn_l                   = models.CharField(max_length=9, blank=True, null=True)
+    openalex_id              = models.CharField(max_length=50, blank=True, null=True)
+    openalex_url             = models.URLField(max_length=512, blank=True, null=True)
+    publisher_name           = models.CharField(max_length=255, blank=True, null=True)
+    works_count              = models.IntegerField(blank=True, null=True)
+    homepage_url             = models.URLField(max_length=512, blank=True, null=True)
+    abbreviated_title        = models.CharField(max_length=255, blank=True, null=True)
+
+    is_oa                    = models.BooleanField(default=False)
+    cited_by_count           = models.IntegerField(blank=True, null=True)
+
+    class Meta:
+        ordering = ['name']
+
+    def __str__(self):
+        return self.name
+
+    @property
+    def works_api_url(self) -> str | None:
+        if not self.openalex_id:
+            return None
+        source_id = self.openalex_id.rstrip('/').split('/')[-1]
+        return f"https://api.openalex.org/works?filter=primary_location.source.id:{source_id}"
+
+    def save(self, *args, **kwargs):
+        super().save(*args, **kwargs)
+        Schedule.objects.filter(name=f"Harvest Source {self.id}").delete()
+        Schedule.objects.create(
+            func='publications.tasks.harvest_oai_endpoint',
+            args=str(self.id),
+            schedule_type=Schedule.MINUTES,
+            minutes=self.harvest_interval_minutes,
+            name=f"Harvest Source {self.id}",
+        )
+Journal = Source  
\ No newline at end of file
diff --git a/publications/serializers.py b/publications/serializers.py
index cf6f663a..c84f66b7 100644
--- a/publications/serializers.py
+++ b/publications/serializers.py
@@ -1,47 +1,87 @@
 """publications serializers."""
 
-from rest_framework_gis import serializers
-from .models import Publication
+from rest_framework import serializers
+from rest_framework_gis.serializers import GeoFeatureModelSerializer
+from rest_framework import serializers as drf_serializers
+from .models import Publication, Subscription, Source
 from django.contrib.auth import get_user_model
-User = get_user_model()
 
-from publications.models import Publication,Subscription
-from django.contrib.auth import get_user_model
 User = get_user_model()
 
-class PublicationSerializer(serializers.GeoFeatureModelSerializer):
-    """publication GeoJSON serializer."""
+class SourceSerializer(serializers.ModelSerializer):
+    class Meta:
+        model = Source
+        fields = (
+            "id",
+            "name",
+            "issn_l",
+            "openalex_id",
+            "openalex_url",
+            "publisher_name",
+            "works_count",
+            "works_api_url",
+        )
+
+
+
+class PublicationSerializer(GeoFeatureModelSerializer):
+    source_details = serializers.SerializerMethodField()
 
     class Meta:
-        """publication serializer meta class."""
         model = Publication
-        fields = ("id", "title" ,"abstract", "publicationDate", "url", "doi", "creationDate", "lastUpdate", "timeperiod_startdate", "timeperiod_enddate")
         geo_field = "geometry"
-        auto_bbox = True      
-       
-class SubscriptionSerializer(serializers.GeoFeatureModelSerializer):
-    """Subscription GeoJSON serializer."""
+        auto_bbox = True
+        fields = [
+            "id",
+            "title",
+            "abstract",
+            "publicationDate",
+            "doi",
+            "url",
+            "timeperiod_startdate",
+            "timeperiod_enddate",
+            "source_details",
+        ]
+
+    def get_source_details(self, obj):
+        source = obj.source
+        if not source:
+            return {}
+        return SourceSerializer(source, context=self.context).data
 
+class SubscriptionSerializer(GeoFeatureModelSerializer):
     class Meta:
         model = Subscription
-        fields = ("search_term","timeperiod_startdate","timeperiod_enddate","user")
+        fields = (
+            "id",
+            "user",
+            "name",
+            "search_term",
+            "timeperiod_startdate",
+            "timeperiod_enddate",
+            "region",
+            "subscribed",
+        )
+
         geo_field = "region"
         auto_bbox = True
-        
-class EmailChangeSerializer(serializers.ModelSerializer):  
+
+
+class EmailChangeSerializer(serializers.ModelSerializer):
     """Handles email change requests."""
 
     class Meta:
         model = User
-        fields = ['email']
+        fields = ["email"]
 
     def validate_email(self, value):
         """Ensure the new email is not already in use."""
         if User.objects.filter(email=value).exists():
-            raise serializers.ValidationError("This email is already registered.")
+            raise drf_serializers.ValidationError("This email is already registered.")
         return value
 
-class UserSerializer(serializers.ModelSerializer):
+
+class UserSerializer(drf_serializers.ModelSerializer):
     class Meta:
         model = User
-        fields = ["id", "username", "email"] 
+        fields = ["id", "username", "email"]
diff --git a/publications/static/css/main.css b/publications/static/css/main.css
index b8311a90..2a9d9d7d 100644
--- a/publications/static/css/main.css
+++ b/publications/static/css/main.css
@@ -175,3 +175,9 @@ main {
   background: #fff;
 }
 
+.leaflet-popup-content {
+  max-width: 250px !important;
+  white-space: normal;
+  word-wrap: break-word;
+  overflow-wrap: break-word;
+}
diff --git a/publications/static/js/main.js b/publications/static/js/main.js
index 369d1937..bfbd4141 100644
--- a/publications/static/js/main.js
+++ b/publications/static/js/main.js
@@ -1,86 +1,138 @@
-const dataCopyright = " | Publication metadata license: <a href='https://creativecommons.org/publicdomain/zero/1.0/'>CC-0</a>";
-const publications_url = '/api/v1/publications.json?limit=999999';
+// publications/static/js/main.js
 
-async function initMap() {
-    var map = L.map("map");
-
-    var osmLayer = L.tileLayer('https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', {
-        attribution: 'Map data: &copy; <a href="https://openstreetmap.org">OpenStreetMap</a> contributors' + dataCopyright,
-        maxZoom: 18
-    }).addTo(map);
-
-    //var esriWorldImageryLayer = L.tileLayer('https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}', {
-    //    attribution: 'Tiles &copy; Esri &mdash; Source: Esri, i-cubed, USDA, USGS, AEX, GeoEye, Getmapping, Aerogrid, IGN, IGP, UPR-EGP, and the GIS User Community' + dataCopyright,
-    //    maxZoom: 18
-    //}).addTo(map);
-
-    var baseLayers = {
-        "OpenStreetMap": osmLayer,
-        //"Esri World Imagery": esriWorldImageryLayer
-    };
-
-    var publicationsGroup = new L.FeatureGroup();
-    map.addLayer(publicationsGroup);
-
-    var overlayMaps = {
-        "Publications": publicationsGroup
-    };
-    
-    L.control.scale({ position: 'bottomright' }).addTo(map);
-    L.control.layers(baseLayers, overlayMaps).addTo(map);
-
-    var publications = await load_publications();
-    var publicationsLayer = L.geoJSON(publications, {
-        onEachFeature: publicationPopup
-    })
-    publicationsLayer.eachLayer(
-        function (l) {
-            publicationsGroup.addLayer(l);
-        });
+// Leaflet map initialization and popup rendering for publication points
+
+// 1. Load all publications from the API
+async function load_publications() {
+  const response = await fetch(publications_url);
+  const body = await response.json();
+  console.log(`OPTIMAP retrieved ${body.count} results.`);
+  return body.results;
+}
+
+// 2. Once the DOM is ready, initialize the map
+$(document).ready(function() {
+  initMap();
+});
 
+// API URL and copyright attribution
+const publications_url = '/api/v1/publications/?limit=999999';
+const dataCopyright =
+  " | Publication metadata license: <a href='https://creativecommons.org/publicdomain/zero/1.0/'>CC-0</a>";
+
+async function initMap() {
+  const map = L.map('map');
+
+  // Base layer: OpenStreetMap
+  const osmLayer = L.tileLayer(
+    'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png',
+    {
+      attribution:
+        'Map data: © <a href="https://openstreetmap.org">OpenStreetMap</a> contributors' +
+        dataCopyright,
+      maxZoom: 18,
+    }
+  ).addTo(map);
+
+  // Group to hold all publication markers
+  const publicationsGroup = new L.FeatureGroup().addTo(map);
+
+  // Controls: scale and layer switcher
+  L.control.scale({ position: 'bottomright' }).addTo(map);
+  L.control
+    .layers(
+      { 'OpenStreetMap': osmLayer },
+      { Publications: publicationsGroup }
+    )
+    .addTo(map);
+
+  // Fetch data and add to map
+  const pubs = await load_publications();
+  const pubsLayer = L.geoJSON(pubs, {
+    style: feature => ({
+     color: feature.properties.source_details.is_preprint ? 'orange' : 'blue',
+     weight: 3,
+     fillOpacity: 0.2,
+   }),
+    onEachFeature: publicationPopup
+  });
+  pubsLayer.eachLayer((layer) => publicationsGroup.addLayer(layer));
+
+  // Fit map to markers
+  if (publicationsGroup.getBounds().isValid()) {
     map.fitBounds(publicationsGroup.getBounds());
+  }
 }
 
+// 3. Popup content generator for each publication feature
 function publicationPopup(feature, layer) {
-    var popupContent = '<div>';
-    if (feature.properties['title']) {
-        popupContent += '<h3>'+ feature.properties['title']+'</h3>'
+  const p = feature.properties;
+  let html = '<div>';
+
+  // Title
+  if (p.title) html += `<h3>${p.title}</h3>`;
+
+  // Source details from nested object
+  if (p.source_details) {
+    const s = p.source_details;
+
+    // Display name
+    const name = s.display_name || s.name || 'Unknown';
+    html += `<div><strong>Source:</strong> ${name}</div>`;
+
+    // Abbreviated title
+    if (s.abbreviated_title) {
+      html += `<div><em>${s.abbreviated_title}</em></div>`;
     }
 
-    if (feature.properties['timeperiod_startdate'] && feature.properties['timeperiod_enddate']) {       
-        popupContent += '<div>' + '<b>' + "Timeperiod : " + '</b>' + "&nbsp;"+ "from" + "&nbsp;"+ feature.properties['timeperiod_startdate'] + "&nbsp;" + "to" + "&nbsp;" + feature.properties['timeperiod_enddate'] +'</div>';
-    }     
+    // Homepage link
+    if (s.homepage_url) {
+      html += `<div><a href="${s.homepage_url}" target="_blank">Visit journal site</a></div>`;
+    }
 
-    if (feature.properties['abstract']) {
-        popupContent += '<div><p>'+ feature.properties['abstract']+ '</p></div>'
+    // ISSN-L link
+    if (s.issn_l) {
+      html +=
+        `<div><strong>ISSN-L:</strong> ` +
+        `<a href="https://openalex.org/sources/issn:${s.issn_l}" target="_blank">${s.issn_l}</a></div>`;
     }
-    
-    if (feature.properties['url']) {       
-        popupContent += '<div><a href=' + feature.properties['url']+ '>' + "Visit Article" + '</a></div>';
-    }  
 
-    if (feature.properties && feature.properties.popupContent) {
-        popupContent += feature.properties.popupContent;
+    // Publisher (only if different from display name)
+    if (s.publisher_name && s.publisher_name !== name) {
+      html += `<div><strong>Publisher:</strong> ${s.publisher_name}</div>`;
     }
 
-    popupContent += '</div>';
+    // Open access status
+    if ('is_oa' in s) {
+      const status = s.is_oa ? 'Open Access' : 'Closed Access';
+      html += `<div><strong>Access:</strong> ${status}</div>`;
+    }
 
-    layer.bindPopup(popupContent, {
-        maxHeight: 225
-    });
-}
+    // Citation count
+    if (s.cited_by_count != null) {
+      html += `<div>Cited by ${s.cited_by_count} works</div>`;
+    }
 
-async function load_publications() {
-    response = await fetch(publications_url);
-    body = await response.json();
-    console.log('OPTIMAP retrieved ' + body.count + ' results.');
-    return body.results;
-}
+    // Works count
+    if (s.works_count != null) {
+      html += `<div>${s.works_count} works hosted</div>`;
+    }
+  }
 
-// render publications after page is loaded
-$(function () {
-    initMap();
-});
+  // Time period
+  if (p.timeperiod_startdate && p.timeperiod_enddate) {
+    html +=
+      `<div><strong>Timeperiod:</strong> from ${p.timeperiod_startdate} to ${p.timeperiod_enddate}</div>`;
+  }
 
+  // Abstract
+  if (p.abstract) html += `<div><p>${p.abstract}</p></div>`;
 
+  // Article link
+  if (p.url) {
+    html += `<div><a href="${p.url}" target="_blank">Visit Article</a></div>`;
+  }
 
+  html += '</div>';
+  layer.bindPopup(html, { maxWidth: 300, maxHeight: 250 });
+}
diff --git a/publications/tasks.py b/publications/tasks.py
index 9b576fc4..09e907f5 100644
--- a/publications/tasks.py
+++ b/publications/tasks.py
@@ -7,228 +7,244 @@
 import gzip
 import re
 import tempfile
+import glob
 import time
 import calendar
-from datetime import datetime, timedelta
+from datetime import datetime, timedelta, timezone as dt_timezone
 import xml.dom.minidom
 import requests
+from pathlib import Path
 from bs4 import BeautifulSoup
-from requests.auth import HTTPBasicAuth
 from urllib.parse import quote
 from django.conf import settings
-from django.core.mail import send_mail, EmailMessage
 from django.core.serializers import serialize
+from django.core.mail import send_mail, EmailMessage
 from django.contrib.gis.geos import GEOSGeometry, GeometryCollection
 from django.utils import timezone
+from django_q.tasks import schedule
+from django_q.models import Schedule
 from publications.models import Publication, HarvestingEvent, Source
 from .models import EmailLog, Subscription
 from django.contrib.auth import get_user_model
-User = get_user_model()
 from django.urls import reverse
-from urllib.parse import quote
-from django_q.tasks import schedule
-from django_q.models import Schedule
-import glob
-from pathlib import Path
-from datetime import datetime, timezone as dt_timezone
+from geopy.geocoders import Nominatim
+from django.contrib.gis.geos import Point
+
+User = get_user_model()
 
 BASE_URL = settings.BASE_URL
 DOI_REGEX = re.compile(r'10\.\d{4,9}/[-._;()/:A-Z0-9]+', re.IGNORECASE)
 CACHE_DIR = Path(tempfile.gettempdir()) / 'optimap_cache'
 
+
 def generate_data_dump_filename(extension: str) -> str:
-    """
-    Returns: optimap_data_dump_YYYYMMDDThhmmss.<extension>
-    """
     ts = datetime.now(dt_timezone.utc).strftime("%Y%m%dT%H%M%S")
     return f"optimap_data_dump_{ts}.{extension}"
 
+
 def cleanup_old_data_dumps(directory: Path, keep: int):
     """
     Deletes all files matching optimap_data_dump_* beyond the newest `keep` ones.
     """
     pattern = str(directory / "optimap_data_dump_*")
-    files = sorted(glob.glob(pattern), reverse=True)  # newest first
+    files = sorted(glob.glob(pattern), reverse=True)
     for old in files[keep:]:
         try:
             os.remove(old)
         except OSError:
             logger.warning("Could not delete old dump %s", old)
 
-def extract_geometry_from_html(content):
-    for tag in content.find_all("meta"):
-        if tag.get("name", None) == "DC.SpatialCoverage":
-            data = tag.get("content", None)
+
+def extract_geometry_from_html(soup: BeautifulSoup):
+    for tag in soup.find_all("meta"):
+        if tag.get("name") == "DC.SpatialCoverage":
             try:
-                geom = json.loads(data)
+                geom = json.loads(tag["content"])
                 geom_data = geom["features"][0]["geometry"]
-                # preparing geometry data in accordance to geos API fields
-                type_geom= {'type': 'GeometryCollection'}
-                geom_content = {"geometries" : [geom_data]}
-                type_geom.update(geom_content)
-                geom_data_string= json.dumps(type_geom)
-                try :
-                    geom_object = GEOSGeometry(geom_data_string) # GeometryCollection object
-                    logging.debug('Found geometry: %s', geom_object)
-                    return geom_object
-                except Exception as e:
-                    logger.error("Cannot create geometry from string '%s': %s", geom_data_string, e)
-            except ValueError as e:
-                logger.error("Error loading JSON from %s: %s", tag.get("name"), e)
-
-def extract_timeperiod_from_html(content):
-    period = [None, None]
-    for tag in content.find_all("meta"):
-        if tag.get("name", None) in ['DC.temporal', 'DC.PeriodOfTime']:
-            data = tag.get("content", None)
-            period =  data.split("/")
-            logging.debug('Found time period: %s', period)
-            break;
-    # returning arrays for array field in DB
-    return [period[0]], [period[1]]
-
-def parse_oai_xml_and_save_publications(content, event):
+                coll = {"type": "GeometryCollection", "geometries": [geom_data]}
+                return GEOSGeometry(json.dumps(coll))
+            except Exception:
+                pass
+    return None
+
+
+def extract_timeperiod_from_html(soup: BeautifulSoup):
+    for tag in soup.find_all("meta"):
+        if tag.get("name") in ("DC.temporal", "DC.PeriodOfTime"):
+            parts = tag["content"].split("/")
+            start = parts[0] if parts[0] else None
+            end   = parts[1] if len(parts) > 1 and parts[1] else None
+            return ([start] if start else [None]), ([end] if end else [None]) # If missing, return [None] for start and [None] for end
+    return [None], [None]
+
+def parse_oai_xml_and_save_publications(content: bytes, event: HarvestingEvent) -> tuple[int, int, int]:
+    """
+    Parse OAI-PMH XML, save Publication records linked to `event`,
+    and return counts: (added, spatial, temporal).
+    """
     try:
-        DOMTree = xml.dom.minidom.parseString(content)
+        dom = xml.dom.minidom.parseString(content)
     except Exception as e:
         logger.error("Error parsing XML: %s", e)
-        return
+        return 0, 0, 0
 
-    collection = DOMTree.documentElement
-    records = collection.getElementsByTagName("record")
-    if not records:
-        logger.warning("No articles found in OAI-PMH response!")
-        return
-    for record in records:
+    for record in dom.getElementsByTagName("record"):
         try:
-            def get_text(tag_name):
+            def get_text(tag_name: str) -> str | None:
                 nodes = record.getElementsByTagName(tag_name)
-                return nodes[0].firstChild.nodeValue.strip() if nodes and nodes[0].firstChild else None
+                return (
+                    nodes[0].firstChild.nodeValue.strip()
+                    if nodes and nodes[0].firstChild else None
+                )
 
-            # collect all dc:identifier values
-            id_nodes = record.getElementsByTagName("dc:identifier")
-            identifiers = [
+            ids = [
                 n.firstChild.nodeValue.strip()
-                for n in id_nodes
-                if n.firstChild and n.firstChild.nodeValue
+                for n in record.getElementsByTagName("dc:identifier")
+                if n.firstChild
             ]
-            http_urls = [u for u in identifiers if u.lower().startswith("http")]
-            view_urls = [u for u in http_urls if "/view/" in u]
-            identifier_value = (view_urls or http_urls or [None])[0]
-
-            title_value = get_text("dc:title")
-            abstract_text = get_text("dc:description")
-            journal_value = get_text("dc:publisher")
-            date_value = get_text("dc:date")
-
-            doi_text = None
-            for ident in identifiers:
-                if match := DOI_REGEX.search(ident):
-                    doi_text = match.group(0)
+            http_ids = [u for u in ids if u.lower().startswith("http")]
+            identifier = None
+            for u in http_ids:
+                if "/view/" in u:
+                    identifier = u
+                    break
+            if not identifier and http_ids:
+                identifier = http_ids[0]
+
+            title          = get_text("dc:title")
+            abstract       = get_text("dc:description")
+            publisher_name = get_text("dc:publisher")
+            pub_date       = get_text("dc:date")
+
+            doi = None
+            for u in ids:
+                m = DOI_REGEX.search(u)
+                if m:
+                    doi = m.group(0)
                     break
 
-            if doi_text and Publication.objects.filter(doi=doi_text).exists():
-                logger.info("Skipping duplicate publication (DOI): %s", doi_text)
+            if doi and Publication.objects.filter(doi=doi).exists():
                 continue
-            if identifier_value and Publication.objects.filter(url=identifier_value).exists():
-                logger.info("Skipping duplicate publication (URL): %s", identifier_value)
+            if identifier and Publication.objects.filter(url=identifier).exists():
                 continue
-            # Skip records without a valid URL.
-            if not identifier_value or not identifier_value.startswith("http"):
-                logger.warning("Skipping record with invalid URL: %s", identifier_value)
+            if not identifier or not identifier.startswith("http"):
                 continue
 
-            geom_object = GeometryCollection()
-            period_start, period_end = [], []
+            src = None
+            if publisher_name:
+                src, _ = Source.objects.get_or_create(name=publisher_name)
+
+            geom = None
+            ps_list = [None]
+            pe_list = [None]
+            
             try:
-                resp = requests.get(identifier_value, timeout=10)
+                resp = requests.get(identifier, timeout=10)
                 resp.raise_for_status()
                 soup = BeautifulSoup(resp.content, "html.parser")
 
-                try:
-                    geom = extract_geometry_from_html(soup)
-                    geom_object = geom or GeometryCollection()
-                except Exception as geo_err:
-                    logger.error("Geometry extraction failed for URL %s: %s", identifier_value, geo_err)
-                    geom_object = GeometryCollection()
-
-                try:
-                    start_time, end_time = extract_timeperiod_from_html(soup)
-                    if isinstance(start_time, list):
-                        period_start = [d for d in start_time if d]
-                    if isinstance(end_time, list):
-                        period_end = [d for d in end_time if d]
-                except Exception as time_err:
-                    logger.error("Time period extraction failed for URL %s: %s", identifier_value, time_err)
-
-            except Exception as fetch_err:
-                logger.error("Error fetching HTML for %s: %s", identifier_value, fetch_err)
-                geom_object = GeometryCollection()
-                period_start, period_end = [], []
-
-            publication = Publication(
-                title=title_value,
-                abstract=abstract_text,
-                publicationDate=date_value,
-                url=identifier_value,
-                doi=doi_text,
-                source=journal_value,
-                geometry=geom_object,
-                timeperiod_startdate=period_start,
-                timeperiod_enddate=period_end,
-                job=event
-            )
-            publication.save()
+                ps_list, pe_list = extract_timeperiod_from_html(soup)
 
+                g = extract_geometry_from_html(soup)
+                if g:
+                    geom = g
+            
+                if src and getattr(src, "is_preprint", False) and geom.empty:
+                    try:
+                        loc = Nominatim(user_agent="optimap-tasks").geocode(src.homepage_url or src.url)
+                        if loc:
+                            geom = Point(loc.longitude, loc.latitude)
+                    except Exception as e:
+                        logger.debug(
+                            "Preprint geocode failed for %s: %s",
+                            src.name if src else identifier,
+                            e
+                        )
+            except Exception as e:
+                logger.debug(
+                    "Retrieval and metadata extraction failed for %s: %s",
+                    src.name if src else identifier,
+                    e
+                )
+                pass
+
+            Publication.objects.create(
+                title=title,
+                abstract=abstract,
+                publicationDate=pub_date,
+                url=identifier,
+                doi=doi,
+                source=src,
+                geometry=geom,
+                timeperiod_startdate=ps_list,
+                timeperiod_enddate=pe_list,
+                job=event,
+            )
         except Exception as e:
             logger.error("Error parsing record: %s", e)
             continue
 
-def harvest_oai_endpoint(source_id, user=None):
-    source = Source.objects.get(id=source_id)
-    event = HarvestingEvent.objects.create(source=source, status="in_progress")
+    added_count    = Publication.objects.filter(job=event).count()
+    spatial_count  = Publication.objects.filter(job=event).exclude(geometry__isnull=True).count()
+    temporal_count = Publication.objects.filter(job=event).exclude(timeperiod_startdate=[]).count()
+    return added_count, spatial_count, temporal_count
 
+def harvest_oai_endpoint(source_id: int, user=None) -> None:
+    """
+    Fetch OAI-PMH feed (HTTP or file://), create a HarvestingEvent,
+    parse & save publications, send summary email, and mark completion.
+    """
     try:
-        response = requests.get(source.url_field)
-        response.raise_for_status()
-        
-        parse_oai_xml_and_save_publications(response.content, event)
-        
-        event.status = "completed"
-        event.completed_at = timezone.now()
-        event.save()
-        
-        new_count = Publication.objects.filter(job=event).count()
-        spatial_count = Publication.objects.filter(job=event).exclude(geometry__isnull=True).count()
-        temporal_count = Publication.objects.filter(job=event).exclude(timeperiod_startdate=[]).count()
-        
-        subject = f"Harvesting Completed for {source.collection_name}"
-        completed_str = event.completed_at.strftime('%Y-%m-%d %H:%M:%S') if event.completed_at else 'N/A'
-        message = (
-            f"Harvesting job details:\n\n"
-            f"Number of added articles: {new_count}\n"
-            f"Number of articles with spatial metadata: {spatial_count}\n"
-            f"Number of articles with temporal metadata: {temporal_count}\n"
-            f"Collection used: {source.collection_name or 'N/A'}\n"
-            f"Journal: {source.url_field}\n"
-            f"Job started at: {event.started_at.strftime('%Y-%m-%d %H:%M:%S')}\n"
-            f"Job completed at: {completed_str}\n"
+        src = Source.objects.get(pk=source_id)
+    except Source.DoesNotExist:
+        logger.error("Source with id %s not found", source_id)
+        return
+    if src.url_field.startswith("file://"):
+        path = src.url_field[7:]
+        try:
+            with open(path, "rb") as f:
+                content = f.read()
+        except Exception as e:
+            logger.error("Failed to read local file %s: %s", path, e)
+            return
+    else:
+        try:
+            resp = requests.get(src.url_field, timeout=30)
+            resp.raise_for_status()
+            content = resp.content
+        except Exception as e:
+            logger.error("Harvesting failed for %s: %s", src.url_field, e)
+            return
+
+    low = (src.homepage_url or src.url_field or "").lower()
+    if any(x in low for x in ("arxiv.org", "biorxiv.org")) and not src.is_preprint:
+        src.is_preprint = True
+        src.save(update_fields=["is_preprint"])
+
+    event = HarvestingEvent.objects.create(
+        source=src,
+        user=user,
+        status="in_progress",
+    )
+    added, spatial, temporal = parse_oai_xml_and_save_publications(content, event)
+    if user:
+        subject = "Harvesting Completed"
+        body = (
+            f"Collection: {src.collection_name}\n"
+            f"Source URL: {src.url_field}\n"
+            f"Number of added articles: {added}\n"
+            f"Number of articles with spatial metadata: {spatial}\n"
+            f"Number of articles with temporal metadata: {temporal}\n"
+            f"Harvest started : {event.started_at:%Y-%m-%d}\n"
         )
-        
-        if user and user.email:
-            send_mail(
-                subject,
-                message,
-                settings.EMAIL_HOST_USER,
-                [user.email],
-                fail_silently=False,
-            )
-    
-    except Exception as e:
-        logger.error("Harvesting failed for source %s: %s", source.url_field, str(e))
-        event.status = "failed"
-        event.completed_at = timezone.now()
-        event.save()
+        send_mail(subject, body, settings.EMAIL_HOST_USER, [user.email])
+
+    event.status       = "completed"
+    event.completed_at = timezone.now()
+    event.save()
+
+    return added, spatial, temporal
+
 
 def send_monthly_email(trigger_source='manual', sent_by=None):
     recipients = User.objects.filter(userprofile__notify_new_manuscripts=True).values_list('email', flat=True)
@@ -250,12 +266,10 @@ def send_monthly_email(trigger_source='manual', sent_by=None):
                 [recipient],
                 fail_silently=False,
             )
-            
             EmailLog.log_email(
                 recipient, subject, content, sent_by=sent_by, trigger_source=trigger_source, status="success"
             )
-            time.sleep(settings.EMAIL_SEND_DELAY) 
-
+            time.sleep(settings.EMAIL_SEND_DELAY)
         except Exception as e:
             error_message = str(e)
             logger.error(f"Failed to send monthly email to {recipient}: {error_message}")
@@ -263,40 +277,35 @@ def send_monthly_email(trigger_source='manual', sent_by=None):
                 recipient, subject, content, sent_by=sent_by, trigger_source=trigger_source, status="failed", error_message=error_message
             )
 
-
 def send_subscription_based_email(trigger_source='manual', sent_by=None, user_ids=None):
-    query = Subscription.objects.filter(subscribed=True, user__isnull=False) 
+    query = Subscription.objects.filter(subscribed=True, user__isnull=False)
     if user_ids:
-        query = query.filter(user__id__in=user_ids) 
+        query = query.filter(user__id__in=user_ids)
 
     for subscription in query:
-        user_email = subscription.user.email  
+        user_email = subscription.user.email
 
         new_publications = Publication.objects.filter(
-                    geometry__intersects=subscription.region, 
-                    # publicationDate__gte=subscription.timeperiod_startdate, 
-                    # publicationDate__lte=subscription.timeperiod_enddate  
+            geometry__intersects=subscription.region,
         )
 
         if not new_publications.exists():
-            continue 
+            continue
 
         unsubscribe_specific = f"{BASE_URL}{reverse('optimap:unsubscribe')}?search={quote(subscription.search_term)}"
         unsubscribe_all = f"{BASE_URL}{reverse('optimap:unsubscribe')}?all=true"
 
         subject = f"📚 New Manuscripts Matching '{subscription.search_term}'"
-        
         bullet_list = "\n".join([f"- {pub.title}" for pub in new_publications])
-
         content = f"""Dear {subscription.user.username},
-        Here are the latest manuscripts matching your subscription:
+Here are the latest manuscripts matching your subscription:
 
-        {bullet_list}
+{bullet_list}
 
-        Manage your subscriptions:
-        Unsubscribe from '{subscription.search_term}': {unsubscribe_specific}
-        Unsubscribe from All: {unsubscribe_all}
-        """
+Manage your subscriptions:
+Unsubscribe from '{subscription.search_term}': {unsubscribe_specific}
+Unsubscribe from All: {unsubscribe_all}
+"""
 
         try:
             email = EmailMessage(subject, content, settings.EMAIL_HOST_USER, [user_email])
@@ -304,8 +313,7 @@ def send_subscription_based_email(trigger_source='manual', sent_by=None, user_id
             EmailLog.log_email(
                 user_email, subject, content, sent_by=sent_by, trigger_source=trigger_source, status="success"
             )
-            time.sleep(settings.EMAIL_SEND_DELAY) 
-
+            time.sleep(settings.EMAIL_SEND_DELAY)
         except Exception as e:
             error_message = str(e)
             logger.error(f"Failed to send subscription email to {user_email}: {error_message}")
@@ -313,11 +321,13 @@ def send_subscription_based_email(trigger_source='manual', sent_by=None, user_id
                 user_email, subject, content, sent_by=sent_by, trigger_source=trigger_source, status="failed", error_message=error_message
             )
 
+# ... (the rest of the file remains unchanged)
+
 def schedule_monthly_email_task(sent_by=None):
     if not Schedule.objects.filter(func='publications.tasks.send_monthly_email').exists():
         now = datetime.now()
-        last_day_of_month = calendar.monthrange(now.year, now.month)[1]  # Get last day of the month
-        next_run_date = now.replace(day=last_day_of_month, hour=23, minute=59)  # Run at the end of the last day
+        last_day_of_month = calendar.monthrange(now.year, now.month)[1]
+        next_run_date = now.replace(day=last_day_of_month, hour=23, minute=59)
         schedule(
             'publications.tasks.send_monthly_email',
             schedule_type='M',
@@ -327,11 +337,12 @@ def schedule_monthly_email_task(sent_by=None):
         )
         logger.info(f"Scheduled 'schedule_monthly_email_task' for {next_run_date}")
 
+
 def schedule_subscription_email_task(sent_by=None):
     if not Schedule.objects.filter(func='publications.tasks.send_subscription_based_email').exists():
         now = datetime.now()
-        last_day_of_month = calendar.monthrange(now.year, now.month)[1]  # Get last day of the month
-        next_run_date = now.replace(day=last_day_of_month, hour=23, minute=59)  # Run at the end of the last day
+        last_day_of_month = calendar.monthrange(now.year, now.month)[1]
+        next_run_date = now.replace(day=last_day_of_month, hour=23, minute=59)
         schedule(
             'publications.tasks.send_subscription_based_email',
             schedule_type='M',
@@ -340,7 +351,8 @@ def schedule_subscription_email_task(sent_by=None):
             kwargs={'trigger_source': 'scheduled', 'sent_by': sent_by.id if sent_by else None} 
         )
         logger.info(f"Scheduled 'send_subscription_based_email' for {next_run_date}")
-        
+
+
 def regenerate_geojson_cache():
     cache_dir = os.path.join(tempfile.gettempdir(), "optimap_cache")
     os.makedirs(cache_dir, exist_ok=True)
@@ -363,10 +375,11 @@ def regenerate_geojson_cache():
 
     size = os.path.getsize(json_path)
     logger.info("Cached GeoJSON at %s (%d bytes), gzipped at %s", json_path, size, gzip_path)
-        # remove old dumps beyond retention
+    # remove old dumps beyond retention
     cleanup_old_data_dumps(Path(cache_dir), settings.DATA_DUMP_RETENTION)
     return json_path
 
+
 def convert_geojson_to_geopackage(geojson_path):
     cache_dir = os.path.dirname(geojson_path)
     gpkg_filename = generate_data_dump_filename("gpkg")
@@ -378,13 +391,9 @@ def convert_geojson_to_geopackage(geojson_path):
             text=True,
         )
         logger.info("ogr2ogr output:\n%s", output)
-            # remove old dumps beyond retention
         return gpkg_path
-    except subprocess.CalledProcessError as e:
+    except subprocess.CalledProcessError:
         return None
-        # on success, return the filename so callers can stream it or inspect it
-        # remove old dumps beyond retention
-    return gpkg_path
 
 
 def regenerate_geopackage_cache():
diff --git a/publications/templates/user_settings.html b/publications/templates/user_settings.html
index f326676f..12656a42 100644
--- a/publications/templates/user_settings.html
+++ b/publications/templates/user_settings.html
@@ -240,7 +240,7 @@ <h5 class="modal-title">Final Confirmation</h5>
                 <div class="modal-footer">
                   <form
                     method="POST"
-                    action="{% url 'optimap:finalize_delete' %}"
+                    action="{% url 'optimap:confirm_delete' %}"
                   >
                     {% csrf_token %}
                     <button type="submit" class="btn btn-danger">
diff --git a/publications/urls.py b/publications/urls.py
index d785a04a..c8fb6ef2 100644
--- a/publications/urls.py
+++ b/publications/urls.py
@@ -1,18 +1,24 @@
 """OPTIMAP urls."""
 
+from django.contrib import admin
 from django.urls import path, include
 from django.shortcuts import redirect
 from publications import views
 from .feeds import GeoFeed
 from django.views.generic import RedirectView
 from drf_spectacular.views import SpectacularAPIView, SpectacularRedocView
+from publications.api  import router as publications_router
+
 
 app_name = "optimap"
 
 urlpatterns = [
     path('', views.main, name="main"),
+    path('admin/', admin.site.urls),
+    path("api/", lambda request: redirect('v1/', permanent=True), name="api"),
+    path("api/v1/", include((publications_router.urls, "publications"), namespace="publications"), name="api_current"),
     path('api/schema/', SpectacularAPIView.as_view(), name='schema'),
-    path('api/schema/ui/',SpectacularRedocView.as_view(url_name='optimap:schema'),name='redoc'),
+    path('api/schema/ui/',SpectacularRedocView.as_view(url_name='optimap:schema'), name='redoc'),
     path('download/geojson/', views.download_geojson, name='download_geojson'),
     path('download/geopackage/', views.download_geopackage, name='download_geopackage'),
     path('favicon.ico', lambda request: redirect('static/favicon.ico', permanent=True)),
@@ -20,19 +26,15 @@
     path('feed/geoatom/', GeoFeed(feed_type_variant="geoatom"), name='geoatom_feed'),
     path('feed/georss/', GeoFeed(feed_type_variant="georss"), name='georss_feed'),
     path('feed/w3cgeo/', GeoFeed(feed_type_variant="w3cgeo"), name='w3cgeo_feed'),
+    path("finalize-delete/", views.finalize_account_deletion, name="finalize_delete"),
     path("about/", views.about, name="about"),
     path("accessibility/", views.accessibility, name="accessibility"),
     path("addsubscriptions/", views.add_subscriptions, name="addsubscriptions"),
-    path("api", lambda request: redirect('/api/v1/', permanent=False), name="api"),
-    path("api/", lambda request: redirect('/api/v1/', permanent=False)),
-    path("api/v1", lambda request: redirect('/api/v1/', permanent=False)),
-    path("api/v1/", include("publications.api")),
     path("changeuser/", views.change_useremail, name="changeuser"),
     path("confirm-delete/<str:token>/", views.confirm_account_deletion, name="confirm_delete"),
     path("confirm-email/<str:token>/<str:email_new>/", views.confirm_email_change, name="confirm-email-change"),
     path("contact/", RedirectView.as_view(pattern_name='about', permanent=True)),
     path("data/", views.data, name="data"),
-    path("finalize-delete/", views.finalize_account_deletion, name="finalize_delete"),
     path("imprint/", RedirectView.as_view(pattern_name='about', permanent=True)),
     path("login/<str:token>", views.authenticate_via_magic_link, name="magic_link"),
     path("loginconfirm/", views.confirmation_login, name="loginconfirm"),
diff --git a/publications/views.py b/publications/views.py
index 6184cab2..be1db6b2 100644
--- a/publications/views.py
+++ b/publications/views.py
@@ -101,7 +101,7 @@ def generate_geopackage():
         feat.SetField("title", pub.title or "")
         feat.SetField("abstract", pub.abstract or "")
         feat.SetField("doi", pub.doi or "")
-        feat.SetField("source", pub.source or "")
+        feat.SetField("source", pub.source.name if pub.source else "")
         if pub.geometry:
             wkb = pub.geometry.wkb
             geom = ogr.CreateGeometryFromWkb(wkb)
@@ -118,15 +118,10 @@ def download_geopackage(request):
     """
     Returns the latest GeoPackage dump file.
     """
-    path = regenerate_geopackage_cache()
-    if not os.path.exists(path):
-        raise Http404('GeoPackage dump not found')
-    return FileResponse(
-        open(path, 'rb'),
-        content_type="application/geopackage+sqlite3",
-        as_attachment=True,
-        filename=Path(path).name
-    )
+    gpkg_path = regenerate_geopackage_cache()
+    if not gpkg_path or not os.path.exists(gpkg_path):
+        raise Http404("GeoPackage not available.")
+    return FileResponse(open(gpkg_path, 'rb'), as_attachment=True, filename=os.path.basename(gpkg_path))
 
 
 def main(request):
diff --git a/publications/viewsets.py b/publications/viewsets.py
index bc1ce513..a6535905 100644
--- a/publications/viewsets.py
+++ b/publications/viewsets.py
@@ -1,25 +1,40 @@
 """publications API views."""
+
 from rest_framework import viewsets
 from rest_framework_gis import filters
-from publications.models import Publication,Subscription
-from publications.serializers import PublicationSerializer,SubscriptionSerializer
-import requests
+from rest_framework.permissions import IsAuthenticatedOrReadOnly
+from .models import Publication, Source, Subscription
+from .serializers import (
+    PublicationSerializer,
+    SourceSerializer,
+    SubscriptionSerializer,
+)
 
-class PublicationViewSet(viewsets.ReadOnlyModelViewSet):
-    """publication view set."""
+class SourceViewSet(viewsets.ReadOnlyModelViewSet):
+    queryset = Source.objects.all()
+    serializer_class = SourceSerializer
+    permission_classes = [IsAuthenticatedOrReadOnly]
 
-    bbox_filter_field = "location"
+class PublicationViewSet(viewsets.ReadOnlyModelViewSet):
+    bbox_filter_field = "geometry"
     filter_backends = (filters.InBBoxFilter,)
-    queryset = Publication.objects.all().filter(status="p")
     serializer_class = PublicationSerializer
+    permission_classes = [IsAuthenticatedOrReadOnly]
+    queryset = Publication.objects.filter(status="p").distinct()
 
-class SubscriptionViewset(viewsets.ModelViewSet):
-
-    bbox_filter_field = "location"
+class SubscriptionViewSet(viewsets.ModelViewSet):
+    """
+    Subscription view set.
+    Each user can list, create, update, or delete their own Subscriptions.
+    """
+    bbox_filter_field = "region"
     filter_backends = (filters.InBBoxFilter,)
     serializer_class = SubscriptionSerializer
+    permission_classes = [IsAuthenticatedOrReadOnly]
 
     def get_queryset(self):
         user = self.request.user
-        queryset = Subscription.objects.filter(user=user)
-        return queryset
\ No newline at end of file
+        return Subscription.objects.filter(user=user)
+
+    def perform_create(self, serializer):
+        serializer.save(user=self.request.user)
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index e5fff563..2e923950 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,3 +36,6 @@ psycopg2-binary==2.9.10
 packaging==21.3
 pycryptodome==3.21.0
 humanize==4.10.0
+pyalex>=0.4.0
+python-stdnum>=2.0.0
+geopy>=2.4.1
\ No newline at end of file
diff --git a/tests-ui/test_admin_block_user.py b/tests-ui/test_admin_block_user.py
index 07e9d505..25dd4860 100644
--- a/tests-ui/test_admin_block_user.py
+++ b/tests-ui/test_admin_block_user.py
@@ -6,7 +6,7 @@
 django.setup()
 
 import unittest
-from django.test import TransactionTestCase
+from django.test import TransactionTestCase, TestCase
 from helium import *
 from django.contrib.auth import get_user_model
 from publications.models import BlockedEmail, BlockedDomain
@@ -14,7 +14,7 @@
 
 User = get_user_model()
 
-class AdminBlockUserTests(TransactionTestCase):
+class AdminBlockUserTests(TestCase):
     def setUp(self):
         """Set up a superuser, test user, and start the browser before each test."""
         self.superuser, _ = User.objects.get_or_create(
diff --git a/tests/harvesting/journal_1/article_01.html b/tests/harvesting/source_1/article_01.html
similarity index 100%
rename from tests/harvesting/journal_1/article_01.html
rename to tests/harvesting/source_1/article_01.html
diff --git a/tests/harvesting/journal_1/article_02.html b/tests/harvesting/source_1/article_02.html
similarity index 100%
rename from tests/harvesting/journal_1/article_02.html
rename to tests/harvesting/source_1/article_02.html
diff --git a/tests/harvesting/journal_1/oai_dc.xml b/tests/harvesting/source_1/oai_dc.xml
similarity index 100%
rename from tests/harvesting/journal_1/oai_dc.xml
rename to tests/harvesting/source_1/oai_dc.xml
diff --git a/tests/harvesting/journal_2/article_10.html b/tests/harvesting/source_2/article_10.html
similarity index 100%
rename from tests/harvesting/journal_2/article_10.html
rename to tests/harvesting/source_2/article_10.html
diff --git a/tests/harvesting/journal_2/article_11.html b/tests/harvesting/source_2/article_11.html
similarity index 100%
rename from tests/harvesting/journal_2/article_11.html
rename to tests/harvesting/source_2/article_11.html
diff --git a/tests/harvesting/journal_2/article_12.html b/tests/harvesting/source_2/article_12.html
similarity index 100%
rename from tests/harvesting/journal_2/article_12.html
rename to tests/harvesting/source_2/article_12.html
diff --git a/tests/harvesting/journal_2/oai_dc.xml b/tests/harvesting/source_2/oai_dc.xml
similarity index 100%
rename from tests/harvesting/journal_2/oai_dc.xml
rename to tests/harvesting/source_2/oai_dc.xml
diff --git a/tests/harvesting/journal_2/rfc1807.xml b/tests/harvesting/source_2/rfc1807.xml
similarity index 100%
rename from tests/harvesting/journal_2/rfc1807.xml
rename to tests/harvesting/source_2/rfc1807.xml
diff --git a/tests/test_geo_data.py b/tests/test_geo_data.py
index 3f2ffd22..934e1743 100644
--- a/tests/test_geo_data.py
+++ b/tests/test_geo_data.py
@@ -9,7 +9,7 @@
 from django.urls import reverse
 from django.conf import settings
 import re
-from publications.models import Publication
+from publications.models import Publication, Source
 from publications.views import generate_geopackage
 from publications.tasks import (
     regenerate_geojson_cache,
@@ -26,40 +26,43 @@ def setUp(self):
         wkt_point2 = "GEOMETRYCOLLECTION(POINT(8.59573 52.96944))"
         wkt_point3 = "GEOMETRYCOLLECTION(POINT(9.59573 53.96944))"
         
+        s1 = Source.objects.create(name="Source One", url_field="http://example.com/1")
         Publication.objects.create(
             title="Publication One",
             abstract="Abstract of publication one.",
             publicationDate="2020-01-01",
             url="http://example.com/1",
-            source="Source One",
+            source=s1,
             doi="10.0001/one",
             geometry=wkt_point1,
             timeperiod_startdate=["2020-01-01"],
             timeperiod_enddate=["2020-12-31"],
         )
+        s2 = Source.objects.create(name="Source Two", url_field="http://example.com/2")
         Publication.objects.create(
             title="Publication Two",
             abstract="Abstract of publication two.",
             publicationDate="2020-06-01",
             url="http://example.com/2",
-            source="Source Two",
+            source=s2,
             doi="10.0001/two",
             geometry=wkt_point2,
             timeperiod_startdate=["2020-06-01"],
             timeperiod_enddate=["2020-12-31"],
         )
+        s3 = Source.objects.create(name="Source Three", url_field="http://example.com/3")
         Publication.objects.create(
             title="Publication Three",
             abstract="Abstract of publication three.",
             publicationDate="2020-09-01",
             url="http://example.com/3",
-            source="Source Three",
+            source=s3,
             doi="10.0001/three",
             geometry=wkt_point3,
             timeperiod_startdate=["2020-09-01"],
             timeperiod_enddate=["2020-12-31"],
-        )
-    
+        )    
+
     def test_geojson_generation(self):
         geojson_data = serialize('geojson', Publication.objects.all(), geometry_field='geometry')
         self.assertTrue(len(geojson_data) > 0, "GeoJSON data should not be empty")
diff --git a/tests/test_harvesting.py b/tests/test_harvesting.py
index 28b5d1b0..c0cb5c3b 100644
--- a/tests/test_harvesting.py
+++ b/tests/test_harvesting.py
@@ -1,136 +1,163 @@
 import os
 import django
+import time
+import responses
+from django.test import Client, TransactionTestCase, TestCase 
+from django.conf import settings
+from django.urls import reverse
+# bootstrap Django
 os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'optimap.settings')
 django.setup()
-from django.test import Client, TestCase
-from publications.tasks import parse_oai_xml_and_save_publications
+
+from publications.tasks import parse_oai_xml_and_save_publications, harvest_oai_endpoint
 from publications.models import Publication, Source, Schedule
-from django_q.tasks import async_task
-import responses
-import time
 from django.contrib.auth import get_user_model
 
 User = get_user_model()
 
+class SimpleTest(TestCase):
 
-class SimpleTest(TestCase):   
-  
+    @responses.activate
     def setUp(self):
         self.client = Client()
+        # create a real user for tasks
         self.user = User.objects.create_user(
-            username="testuser", 
-            email="testuser@example.com", 
+            username="testuser",
+            email="testuser@example.com",
             password="password123"
         )
 
-        results = self.client.get('/api/v1/publications/').json()['results']
-        features = results.get('features', [])
-
-        if len(features) >= 2:
-            self.id1 = features[1]['id']
-            self.id2 = features[0]['id']
-        elif len(features) == 1:
-            self.id1 = self.id2 = features[0]['id']
-        else:
-            self.id1 = self.id2 = None  
-
-    @classmethod
-    @responses.activate
-    def setUpClass(cls):
+        # Clear out any publications
         Publication.objects.all().delete()
 
-        with open(os.path.join(os.getcwd(), 'tests', 'harvesting', 'journal_1', 'oai_dc.xml')) as oai, open(os.path.join(os.getcwd(), 'tests', 'harvesting', 'journal_1', 'article_01.html')) as article01, open(os.path.join(os.getcwd(), 'tests', 'harvesting', 'journal_1', 'article_02.html')) as article02:
-            responses.get('http://localhost:8330/index.php/opti-geo/article/view/1',
-                          body = article01.read())
-            responses.get('http://localhost:8330/index.php/opti-geo/article/view/2',
-                          body = article02.read())
-
-            parse_oai_xml_and_save_publications(oai.read(), event=None)
-
-            # set status to published
+        # harvest some sample OAI data
+        base = os.path.join(settings.BASE_DIR, 'tests', 'harvesting', 'source_1')
+        oai_path = os.path.join(base, 'oai_dc.xml')
+        art1_path = os.path.join(base, 'article_01.html')
+        art2_path = os.path.join(base, 'article_02.html')
+
+        with open(oai_path) as oai,\
+             open(art1_path) as a1,\
+             open(art2_path) as a2:
+            # stub the HTTP fetches that parse_oai_xml_and_save_publications does
+            responses.get(
+                'http://localhost:8330/index.php/opti-geo/article/view/1',
+                body=a1.read()
+            )
+            responses.get(
+                'http://localhost:8330/index.php/opti-geo/article/view/2',
+                body=a2.read()
+            )
+
+            # run the parser against the OAI XML
+            with open(oai_path) as o:
+                added_count, spatial_count, temporal_count = parse_oai_xml_and_save_publications(o.read(), event=None)
+                self.assertEqual([added_count, spatial_count, temporal_count], [2, 2, 2], "parse_oai_xml_and_save_publications should have added two publications")
+
+            # mark them as published so the API will expose them
             Publication.objects.all().update(status="p")
 
-    @classmethod
-    def tearDownClass(cls):
-        Publication.objects.all().delete()
+        # fetch IDs from the API to use in individual‐publication tests
+        api = self.client.get('/api/v1/publications/').json()
+        fc = api['results']['features']
+        if len(fc) >= 2:
+            self.id1, self.id2 = fc[1]['id'], fc[0]['id']
+        elif len(fc) == 1:
+            self.id1 = self.id2 = fc[0]['id']
+        else:
+            self.id1 = self.id2 = None
 
     def test_api_root(self):
-        response = self.client.get('/api/v1/publications/')
-        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.get('Content-Type'), 'application/json')
-
-        results = response.json()['results']
+        resp = self.client.get('/api/v1/publications/')
+        self.assertEqual(resp.status_code, 200)
+        self.assertEqual(resp['Content-Type'], 'application/json')
 
+        results = resp.json()['results']
         self.assertEqual(results['type'], 'FeatureCollection')
         self.assertEqual(len(results['features']), 2)
 
     def test_api_publication_1(self):
-        response = self.client.get('/api/v1/publications/%s.json' % self.id1)
-        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.get('Content-Type'), 'application/json')
+        resp = self.client.get(f'/api/v1/publications/{self.id1}.json')
+        self.assertEqual(resp.status_code, 200)
+        self.assertEqual(resp['Content-Type'], 'application/json')
 
-        body = response.json()
+        body = resp.json()
         self.assertEqual(body['type'], 'Feature')
-        self.assertEqual(body['geometry']['type'], 'GeometryCollection')
-        self.assertEqual(body['geometry']['geometries'][0]['type'], 'LineString')
-        self.assertEqual(body['properties']['title'], 'Test 1: One')
-        self.assertEqual(body['properties']['publicationDate'], '2022-07-01')
-        self.assertEqual(body['properties']['timeperiod_startdate'],['2022-06-01'])
-        self.assertEqual(body['properties']['url'],'http://localhost:8330/index.php/opti-geo/article/view/1')
-        
-    def test_api_publication_2(self):
-        response = self.client.get('/api/v1/publications/%s.json' % self.id2)
-        self.assertEqual(response.status_code, 200)
-        self.assertEqual(response.get('Content-Type'), 'application/json')
+        geom = body['geometry']
+        self.assertEqual(geom['type'], 'GeometryCollection')
+        self.assertEqual(geom['geometries'][0]['type'], 'LineString')
+
+        props = body['properties']
+        self.assertEqual(props['title'], 'Test 1: One')
+        self.assertEqual(props['publicationDate'], '2022-07-01')
+        self.assertEqual(props['timeperiod_startdate'], ['2022-06-01'])
+        self.assertEqual(
+            props['url'],
+            'http://localhost:8330/index.php/opti-geo/article/view/1'
+        )
 
-        body = response.json()
-        self.assertEqual(body['type'], 'Feature')
-        self.assertEqual(body['geometry']['type'], 'GeometryCollection')
-        self.assertEqual(body['geometry']['geometries'][0]['type'], 'Polygon')
-        self.assertEqual(body['properties']['title'], 'Test 2: Two')
-        self.assertIsNone(body['properties']['doi'])
-        self.assertEqual(body['properties']['timeperiod_enddate'],['2022-03-31'])
-        self.assertEqual(body['properties']['url'],'http://localhost:8330/index.php/opti-geo/article/view/2')
+    def test_api_publication_2(self):
+        resp = self.client.get(f'/api/v1/publications/{self.id2}.json')
+        self.assertEqual(resp.status_code, 200)
+        self.assertEqual(resp['Content-Type'], 'application/json')
+
+        body = resp.json()
+        geom = body['geometry']
+        self.assertEqual(geom['type'], 'GeometryCollection')
+        self.assertEqual(geom['geometries'][0]['type'], 'Polygon')
+
+        props = body['properties']
+        self.assertEqual(props['title'], 'Test 2: Two')
+        self.assertIsNone(props['doi'])
+        self.assertEqual(props['timeperiod_enddate'], ['2022-03-31'])
+        self.assertEqual(
+            props['url'],
+            'http://localhost:8330/index.php/opti-geo/article/view/2'
+        )
 
     def test_task_scheduling(self):
-        oai_file_path = os.path.join(os.getcwd(), "tests", "harvesting", "journal_1", "oai_dc.xml")
-        source = Source.objects.create(
-            url_field=f"file://{oai_file_path}",
+        # Create a Source pointing to the local OAI file
+        oai_file = os.path.join(os.getcwd(), 'tests', 'harvesting', 'source_1', 'oai_dc.xml')
+        src = Source.objects.create(
+            name="Local OAI",
+            url_field=f"file://{oai_file}",
             harvest_interval_minutes=60
         )
-        source.save()
+        # allow the save() hook to schedule
         time.sleep(2)
-        schedule = Schedule.objects.filter(name=f"Harvest Source {source.id}")
-        self.assertTrue(schedule.exists(), "Django-Q task not scheduled for source.")
-
-        from publications.tasks import harvest_oai_endpoint
-        harvest_oai_endpoint(source.id, self.user)
-
-        publications_count = Publication.objects.count()
-        self.assertGreater(publications_count, 0, "No publications were harvested.")
-
-        with open(oai_file_path, "r") as oai:
-            content = oai.read()
-            parse_oai_xml_and_save_publications(content, event=None)
-            parse_oai_xml_and_save_publications(content, event=None)
-
-        final_count = Publication.objects.count()
-        self.assertEqual(final_count, publications_count, "Duplicate publications were created!")
-
-        publications_with_doi = Publication.objects.exclude(doi__isnull=True)
-
-        self.assertTrue(publications_with_doi.exists(), "No publication with DOI found.")
-        for pub in publications_with_doi:
-            self.assertTrue(pub.doi.startswith("10."), f"DOI '{pub.doi}' is not correctly formatted.")
-
-
-    def test_no_duplicates(self):   
-        publications_count = Publication.objects.count()
-        self.assertEqual(publications_count, 2, "Duplicate publications were created!")
-
-        response = self.client.get('/api/v1/publications/')
-        results = response.json()['results']
 
-        titles = [pub['properties']['title'] for pub in results['features']]
-        unique_titles = list(set(titles))
-        self.assertEqual(len(titles), len(unique_titles))
+        sched = Schedule.objects.filter(name=f"Harvest Source {src.id}")
+        self.assertTrue(sched.exists(), "Django-Q task not scheduled on save()")
+
+        count = Publication.objects.count()
+        self.assertEqual(count, 2, "harvest_oai_endpoint created two publications")
+
+        # run it explicitly again for the second time
+        added, spatial, temporal = harvest_oai_endpoint(src.id, self.user)
+        count = Publication.objects.count()
+        self.assertEqual(count, 2, "harvest_oai_endpoint created no new publications")
+        self.assertEqual([added, spatial, temporal], [0, 0, 0], "harvest_oai_endpoint created no new publications")
+
+        # re-parse to check deduplication
+        with open(oai_file) as f:
+            xml = f.read()
+        parse_oai_xml_and_save_publications(xml, event=None)
+        parse_oai_xml_and_save_publications(xml, event=None)
+        self.assertEqual(Publication.objects.count(), count,
+                         "Duplicate publications were created!")
+
+        # ensure at least one DOI is valid
+        pubs_with_doi = Publication.objects.exclude(doi__isnull=True)
+        self.assertTrue(pubs_with_doi.exists())
+        for p in pubs_with_doi:
+            self.assertTrue(p.doi.startswith("10."),
+                            f"DOI is incorrectly formatted: {p.doi}")
+
+    def test_no_duplicates_after_initial_harvest(self):
+        # exactly 2 from our sample OAI
+        self.assertEqual(Publication.objects.count(), 2)
+        resp = self.client.get('/api/v1/publications/')
+        feats = resp.json()['results']['features']
+        titles = [f['properties']['title'] for f in feats]
+        self.assertEqual(len(titles), len(set(titles)),
+                         "API returned duplicate feature titles")
diff --git a/tests/test_publications_api.py b/tests/test_publications_api.py
index 072f2c3c..69872bb6 100644
--- a/tests/test_publications_api.py
+++ b/tests/test_publications_api.py
@@ -1,6 +1,6 @@
 from datetime import date
 import os
-from django.test import Client, TestCase
+from django.test import Client, TransactionTestCase, TestCase
 from publications.models import Publication
 from django.contrib.gis.geos import Point, MultiPoint, LineString, Polygon, GeometryCollection
 from django.contrib.auth import get_user_model
@@ -45,11 +45,15 @@ def tearDown(self):
 
     def test_api_redirect(self):
         response = self.client.get('/api')
-        self.assertEqual(response.status_code, 302)
-        self.assertEqual(response.url, '/api/v1/')
+        self.assertEqual(response.status_code, 301)
+        self.assertEqual(response.url, '/api/')
 
         response = self.client.get('/api/')
-        self.assertEqual(response.status_code, 302)
+        self.assertEqual(response.status_code, 301)
+        self.assertEqual(response.url, 'v1/')
+
+        response = self.client.get('/api/v1')
+        self.assertEqual(response.status_code, 301)
         self.assertEqual(response.url, '/api/v1/')
 
     def test_api_root(self):
diff --git a/tests/test_regular_harvesting.py b/tests/test_regular_harvesting.py
index 1ec03d76..3a13b561 100644
--- a/tests/test_regular_harvesting.py
+++ b/tests/test_regular_harvesting.py
@@ -4,7 +4,7 @@
 django.setup()
 import unittest
 from publications.tasks import harvest_oai_endpoint
-from django.test import TestCase, Client
+from django.test import TransactionTestCase, TestCase, Client
 from django.core import mail
 from django.utils import timezone
 from django.conf import settings
@@ -17,6 +17,7 @@
 
 @override_settings(EMAIL_BACKEND="django.core.mail.backends.locmem.EmailBackend")
 class HarvestRegularMetadataTestCase(TestCase):
+    
     def setUp(self):
         Publication.objects.all().delete()
         HarvestingEvent.objects.all().delete()
@@ -57,6 +58,7 @@ def fake_parser_func(content, event):
                 timeperiod_enddate=[],
                 geometry=None
             )
+            return 2, 0, 0  # Two publications added, no spatial or temporal metadata
 
         mock_parser.side_effect = fake_parser_func
 
diff --git a/tests/test_source_publication.py b/tests/test_source_publication.py
new file mode 100644
index 00000000..b0c8435b
--- /dev/null
+++ b/tests/test_source_publication.py
@@ -0,0 +1,232 @@
+import json
+from django.test import TransactionTestCase, TestCase
+from rest_framework.test import APIClient
+from rest_framework import status
+from publications.models import Source, Publication
+
+class SourceAPITest(TestCase):
+    """
+    Tests for the source endpoints:
+      - GET /api/v1/sources/
+      - GET /api/v1/sources/{pk}/
+    """
+
+    def setUp(self):
+        self.client = APIClient()
+
+        # 1. Fully populated source (works_api_url is now a property, not a field)
+        self.srcA = Source.objects.create(
+            name="Test source A",
+            issn_l="1234-5678",
+            openalex_id="https://openalex.org/S012345678",
+            openalex_url="https://openalex.org/S012345678",
+            publisher_name="Test Publisher A",
+            works_count=42,
+        )
+
+        # 2. Source missing optional fields
+        self.srcB = Source.objects.create(
+            name="No ISSN source",
+            issn_l=None,
+            openalex_id=None,
+            openalex_url=None,
+            publisher_name=None,
+            works_count=None,
+        )
+
+    def test_list_sources(self):
+        """
+        GET /api/v1/sources/ should return at least two sources,
+        and each result must include the eight expected fields.
+        """
+        url = "/api/v1/sources/"
+        response = self.client.get(url, format="json")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        data = response.json()
+
+        # Unwrap pagination if present
+        if isinstance(data, dict) and "results" in data:
+            sources_list = data["results"]
+        else:
+            sources_list = data
+
+        # We expect exactly 2 sources
+        self.assertEqual(len(sources_list), 2)
+        names = {j["name"] for j in sources_list}
+        self.assertIn("Test source A", names)
+        self.assertIn("No ISSN source", names)
+
+        # Verify all eight fields for the populated source
+        populated = next(x for x in sources_list if x["name"] == "Test source A")
+        for key in [
+            "id",
+            "name",
+            "issn_l",
+            "openalex_id",
+            "openalex_url",
+            "publisher_name",
+            "works_count",
+            "works_api_url",
+        ]:
+            self.assertIn(key, populated)
+
+        # Verify the second source has None (null) for optional fields
+        no_issn = next(x for x in sources_list if x["name"] == "No ISSN source")
+        self.assertIsNone(no_issn["issn_l"])
+        self.assertIsNone(no_issn["openalex_id"])
+        self.assertIsNone(no_issn["publisher_name"])
+        self.assertIsNone(no_issn["works_count"])
+        self.assertIsNone(no_issn["works_api_url"])
+
+    def test_retrieve_source_details(self):
+        """
+        GET /api/v1/sources/{pk}/ should return the correct fields for that source.
+        """
+        src = Source.objects.get(name="Test source A")
+        url = f"/api/v1/sources/{src.pk}/"
+        response = self.client.get(url, format="json")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        jdata = response.json()
+        self.assertEqual(jdata["id"], src.pk)
+        self.assertEqual(jdata["name"], src.name)
+        self.assertEqual(jdata["issn_l"], src.issn_l)
+        self.assertEqual(jdata["openalex_id"], src.openalex_id)
+        self.assertEqual(jdata["openalex_url"], src.openalex_url)
+        self.assertEqual(jdata["publisher_name"], src.publisher_name)
+        self.assertEqual(jdata["works_count"], src.works_count)
+        self.assertEqual(jdata["works_api_url"], src.works_api_url)
+
+
+class PublicationAPITest(TestCase):
+    """
+    Tests for the Publication endpoints:
+      - GET /api/v1/publications/
+      - Filtering by ?source_id={pk}
+      - Ensure nested 'source_details' appears with its fields.
+    """
+
+    def setUp(self):
+        self.client = APIClient()
+
+        # 1. Create one source to attach to a Publication
+        self.src = Source.objects.create(
+            name="API source",
+            issn_l="1111-2222",
+            openalex_id="https://openalex.org/S011112222",
+            openalex_url="https://openalex.org/S011112222",
+            publisher_name="API Publisher",
+            works_count=7,
+        )
+
+        # 2. Create a published Publication linked to that source
+        Publication.objects.create(
+            title="API Paper",
+            abstract="Testing nested source_details serialization",
+            publicationDate="2021-01-01",
+            doi="10.1000/testdoi",
+            url="http://example.com/api-paper",
+            geometry=None,  # No geometry for test convenience
+            source=self.src,
+            timeperiod_startdate=["2020-01-01"],
+            timeperiod_enddate=["2021-01-01"],
+            provenance="Test provenance",
+            status="p"
+        )
+
+    def _unwrap_publications(self, data):
+        """
+        Given JSON from /api/v1/publications/, return a list of property‐dicts.
+
+        Handles:
+          1. Paginated GeoJSON: data["results"] is a dict (FeatureCollection).
+          2. Ungrouped GeoJSON: data["features"] directly.
+          3. Simple list of dicts (fall back).
+        """
+        # Case 1: Paginated—"results" holds a FeatureCollection dict
+        if isinstance(data, dict) and "results" in data:
+            results_block = data["results"]
+            if not isinstance(results_block, dict):
+                self.fail(f"Expected 'results' to be a dict containing FeatureCollection, got {type(results_block).__name__}")
+            # Now expect results_block["features"] to be a list
+            if "features" not in results_block or not isinstance(results_block["features"], list):
+                self.fail(f"Expected 'features' list inside paginated 'results', but got: {results_block}")
+            return [feat["properties"] for feat in results_block["features"]]
+
+        # Case 2: Unpaginated GeoJSON—top‐level "features"
+        if isinstance(data, dict) and "features" in data:
+            features_block = data["features"]
+            if not isinstance(features_block, list):
+                self.fail(f"Expected top‐level 'features' to be a list, but got {type(features_block).__name__}")
+            return [feat["properties"] for feat in features_block]
+
+        # Case 3: Plain list (already a list of property‐dicts)
+        if isinstance(data, list):
+            # If items look like GeoJSON features, unwrap their "properties"
+            if len(data) > 0 and isinstance(data[0], dict) and "properties" in data[0]:
+                return [item["properties"] for item in data]
+            return data
+
+        # Anything else is unexpected
+        self.fail(f"Unexpected JSON structure for publications endpoint: {type(data).__name__}")
+
+    def test_publication_includes_source_details(self):
+        """
+        GET /api/v1/publications/ should return ≥1 publication,
+        and each publication’s 'source_details' must include all eight source fields.
+        """
+        url = "/api/v1/publications/"
+        response = self.client.get(url, format="json")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        data = response.json()
+        pubs_list = self._unwrap_publications(data)
+        self.assertEqual(len(pubs_list), 1)
+
+        pub_data = pubs_list[0]
+        self.assertIn("source_details", pub_data)
+        details = pub_data["source_details"]
+        self.assertIsInstance(details, dict)
+
+        # Check that all eight fields appear:
+        for key in [
+            "id",
+            "name",
+            "issn_l",
+            "openalex_id",
+            "openalex_url",
+            "publisher_name",
+            "works_count",
+            "works_api_url",
+        ]:
+            self.assertIn(key, details)
+
+        # Compare against the source we created
+        self.assertEqual(details["id"], self.src.pk)
+        self.assertEqual(details["name"], self.src.name)
+        self.assertEqual(details["issn_l"], self.src.issn_l)
+        self.assertEqual(details["openalex_id"], self.src.openalex_id)
+        self.assertEqual(details["openalex_url"], self.src.openalex_url)
+        self.assertEqual(details["publisher_name"], self.src.publisher_name)
+        self.assertEqual(details["works_count"], self.src.works_count)
+        self.assertEqual(details["works_api_url"], self.src.works_api_url)
+
+    def test_filter_publications_by_source(self):
+        """
+        GET /api/v1/publications/?source_id=<pk> should return only those
+        publications whose source_details["id"] equals <pk>.
+        """
+        url = f"/api/v1/publications/?source_id={self.src.pk}"
+        response = self.client.get(url, format="json")
+        self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+        data = response.json()
+        pubs_list = self._unwrap_publications(data)
+        self.assertEqual(len(pubs_list), 1)
+
+        # Each publication must have source_details["id"] == source.pk
+        for p in pubs_list:
+            self.assertIn("source_details", p)
+            self.assertIsInstance(p["source_details"], dict)
+            self.assertEqual(p["source_details"]["id"], self.src.pk)