Skip to content

Commit 0183c77

Browse files
committed
Refine DE/ZA fusion converters per review feedback
- Use crop extension for crop fields - Remove unnecessary reprojection and area conversion - Rely on framework handling for extra columns
1 parent df2dbec commit 0183c77

File tree

4 files changed

+11
-37
lines changed

4 files changed

+11
-37
lines changed

fiboa_cli/datasets/de_fusion.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from fiboa_cli.conversion.fiboa_converter import FiboaBaseConverter
22

33

4+
CROP_EXTENSION = "https://fiboa.org/crop-extension/v0.2.0/schema.yaml"
5+
6+
47
class DeFusionConverter(FiboaBaseConverter):
58
sources = {
69
"https://data.source.coop/esa/fusion-competition/br-17E-243N-crop-labels-test-2019.geojson": "de_test_2019.geojson",
@@ -17,6 +20,8 @@ class DeFusionConverter(FiboaBaseConverter):
1720
provider = "ESA Fusion Competition via Source Cooperative <https://source.coop/esa/fusion-competition>"
1821
attribution = "https://data.source.coop/esa/fusion-competition"
1922
license = "CC-BY-4.0"
23+
extensions = {CROP_EXTENSION}
24+
area_is_in_ha = False
2025
columns = {
2126
"geometry": "geometry",
2227
"id": "id",
@@ -27,12 +32,6 @@ class DeFusionConverter(FiboaBaseConverter):
2732
column_additions = {
2833
"determination:datetime": "2019-01-01T00:00:00Z",
2934
}
30-
missing_schemas = {
31-
"properties": {
32-
"crop:code": {"type": "uint16"},
33-
"crop:name": {"type": "string"},
34-
}
35-
}
3635

3736
def _normalize_geojson_properties(self, feature):
3837
# These GeoJSON files have no top-level feature id, only fid inside properties.
@@ -42,12 +41,4 @@ def _normalize_geojson_properties(self, feature):
4241
return feature
4342

4443
def migrate(self, gdf):
45-
# Reproject from EPSG:25833 to WGS84 as required by fiboa spec
46-
if gdf.crs is not None and gdf.crs.to_epsg() != 4326:
47-
gdf = gdf.to_crs("EPSG:4326")
48-
49-
# Convert area from m² to hectares
50-
gdf["SHAPE_AREA"] = gdf["SHAPE_AREA"] / 10000
51-
5244
return super().migrate(gdf)
53-

fiboa_cli/datasets/de_fusion_ml.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@ def file_migration(self, gdf, path, uri, layer=None):
1616
def migrate(self, gdf):
1717
# Assign split from temp marker
1818
gdf["split"] = gdf["_source_split"].astype(object)
19-
20-
# Build unique IDs from split + fid before dropping the temp marker.
21-
# Using split as prefix avoids collisions between train/test files
22-
# since both files contain overlapping fid values.
19+
# Build unique IDs from split + fid to avoid collisions between files
2320
gdf["id"] = gdf["_source_split"] + "_" + gdf["fid"].astype(str)
24-
25-
gdf = gdf.drop(columns=["_source_split"])
2621
return super().migrate(gdf)

fiboa_cli/datasets/za_fusion.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
from fiboa_cli.conversion.fiboa_converter import FiboaBaseConverter
22

33

4+
CROP_EXTENSION = "https://fiboa.org/crop-extension/v0.2.0/schema.yaml"
5+
6+
47
class ZaFusionConverter(FiboaBaseConverter):
58
sources = {
69
"https://data.source.coop/esa/fusion-competition/sa-19E-258N-crop-labels-train-2017.geojson": "za_train_258N.geojson",
@@ -18,6 +21,8 @@ class ZaFusionConverter(FiboaBaseConverter):
1821
provider = "ESA Fusion Competition via Source Cooperative <https://source.coop/esa/fusion-competition>"
1922
attribution = "https://data.source.coop/esa/fusion-competition"
2023
license = "CC-BY-4.0"
24+
extensions = {CROP_EXTENSION}
25+
area_is_in_ha = False
2126
columns = {
2227
"geometry": "geometry",
2328
"id": "id",
@@ -28,25 +33,11 @@ class ZaFusionConverter(FiboaBaseConverter):
2833
column_additions = {
2934
"determination:datetime": "2017-01-01T00:00:00Z",
3035
}
31-
missing_schemas = {
32-
"properties": {
33-
"crop:code": {"type": "uint16"},
34-
"crop:name": {"type": "string"},
35-
}
36-
}
3736

3837
def _normalize_geojson_properties(self, feature):
39-
# These GeoJSON files have no top-level feature id, only fid inside properties.
4038
if "id" not in feature["properties"]:
4139
feature["properties"]["id"] = feature["properties"].get("fid", None)
4240
return feature
4341

4442
def migrate(self, gdf):
45-
# Reproject from EPSG:32734 to WGS84 as required by fiboa spec
46-
if gdf.crs is not None and gdf.crs.to_epsg() != 4326:
47-
gdf = gdf.to_crs("EPSG:4326")
48-
49-
# Convert area from m² to hectares
50-
gdf["SHAPE_AREA"] = gdf["SHAPE_AREA"] / 10000
51-
5243
return super().migrate(gdf)

fiboa_cli/datasets/za_fusion_ml.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,16 +5,13 @@
55
class ZaFusionMlConverter(MlSplitsMixin, ZaFusionConverter):
66

77
def file_migration(self, gdf, path, uri, layer=None):
8-
# train files contain "train", test file contains "test"
98
if "train" in path:
109
gdf["_source_split"] = "train"
1110
else:
1211
gdf["_source_split"] = "test"
1312
return super().file_migration(gdf, path, uri, layer)
1413

1514
def migrate(self, gdf):
16-
# Assign split from temp marker and build unique IDs
1715
gdf["split"] = gdf["_source_split"].astype(object)
1816
gdf["id"] = gdf["_source_split"] + "_" + gdf["fid"].astype(str)
19-
gdf = gdf.drop(columns=["_source_split"])
2017
return super().migrate(gdf)

0 commit comments

Comments
 (0)