Skip to content

Commit 6a9e9e8

Browse files
committed
Change sortBy to use set insertion for sorting.
1 parent 9bd53d2 commit 6a9e9e8

File tree

1 file changed

+2
-7
lines changed

1 file changed

+2
-7
lines changed

src/Data/DataFrame/Operations.hs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -258,18 +258,13 @@ sortBy :: T.Text
258258
sortBy sortColumnName order df = let
259259
pick indexes c@(BoxedColumn column) = BoxedColumn $ indexes `getIndices` column
260260
pick indexes c@(UnboxedColumn column) = UnboxedColumn $ indexes `getIndicesUnboxed` column
261-
-- TODO: This is a REALLY inefficient sorting algorithm (insertion sort).
262-
-- Complains about escaping context when you try and use sort by.
263-
insertSorted _ t [] = [t]
264-
insertSorted Ascending t@(a, b) lst@(x:xs) = if b < snd x then t:lst else insertSorted Ascending t xs
265-
insertSorted Descending t@(a, b) lst@(x:xs) = if b > snd x then t:lst else insertSorted Descending t xs
266261
in case sortColumnName `M.lookup` columns df of
267262
Nothing -> error $ columnNotFound sortColumnName "valueCounts" (columnNames df)
268263
Just (BoxedColumn (column :: V.Vector c)) -> let
269-
indexes = map fst $ V.ifoldr (\i e acc -> insertSorted order (i, e) acc) [] column
264+
indexes = map snd . (if order == Ascending then S.toAscList else S.toDescList) $ VG.ifoldr (\i e acc -> S.insert (e, i) acc) S.empty column
270265
in df { columns = MS.map (pick indexes) (columns df) }
271266
Just (UnboxedColumn (column :: VU.Vector c)) -> let
272-
indexes = map fst $ VU.ifoldr (\i e acc -> insertSorted order (i, e) acc) [] column
267+
indexes = map snd $ (if order == Ascending then S.toAscList else S.toDescList) $ VU.ifoldr (\i e acc -> S.insert (e, i) acc) S.empty column
273268
in df { columns = MS.map (pick indexes) (columns df) }
274269

275270
-- | O(log n) Get the number of elements in a given column.

0 commit comments

Comments
 (0)