diff --git a/cosima_cookbook/querying.py b/cosima_cookbook/querying.py
index 835d0c9..73d4a3b 100644
--- a/cosima_cookbook/querying.py
+++ b/cosima_cookbook/querying.py
@@ -7,8 +7,10 @@
 import logging
 import os.path
 import pandas as pd
+import sqlalchemy as sa
 from sqlalchemy import func, distinct, or_
 from sqlalchemy.orm import aliased
+from sqlalchemy.sql.expression import cast
 from sqlalchemy.sql.selectable import subquery
 import warnings
 import xarray as xr
@@ -272,6 +274,50 @@ def get_frequencies(session, experiment=None):
     return pd.DataFrame(q, columns=[c["name"] for c in q.column_descriptions])
 
 
+def get_ranges(session, experiment, variable, frequency, cellmethods=None):
+    # first, we query for the files with a flag indicating that the current row is not
+    # contiguous with its predecessor
+    flag_q = (
+        session.query(
+            NCFile.time_start,
+            NCFile.time_end,
+            (
+                NCFile.time_start
+                != func.lag(NCFile.time_end, 1, "").over(order_by=NCFile.time_start)
+            ).label("flag"),
+        )
+        .join(NCFile.experiment)
+        .join(NCFile.ncvars)
+        .join(NCVar.variable)
+        .filter(NCExperiment.experiment == experiment)
+        .filter(NCFile.frequency == frequency)
+        .filter(NCVar.variable == variable)
+        .order_by(NCFile.time_start)
+    ).subquery()
+
+    # now, by summing over the flag (as an integer), we get a column that allows us to group
+    # on consecutive files
+    group_q = session.query(
+        flag_q,
+        func.sum(cast(flag_q.c.flag, sa.Integer))
+        .over(order_by=flag_q.c.time_start)
+        .label("grp"),
+    ).subquery()
+
+    # we just need the smallest start time and largest end time out of each group
+    # to gets its extent
+    q = (
+        session.query(
+            func.min(group_q.c.time_start),
+            func.max(group_q.c.time_end),
+        )
+        .group_by(group_q.c.grp)
+        .order_by(group_q.c.time_start)
+    )
+
+    return pd.DataFrame(q, columns=["start", "end"])
+
+
 def getvar(
     expt,
     variable,