This crate provides extra functions for DataFusion, specifically focusing on advanced aggregations. These extensions are inspired by other projects like DuckDB and Spark SQL.
-- Create a table with various columns containing strings, integers, floats, dates, and timesCREATETABLEtest_table ( utf8_colVARCHAR, int64_colINT, float64_col FLOAT, date64_colDATE, time64_colTIME)ASVALUES('apple',1,1.0,'2021-01-01','01:00:00'),('banana',2,2.0,'2021-01-02','02:00:00'),('apple',2,2.0,'2021-01-02','02:00:00'),('orange',3,3.0,'2021-01-03','03:00:00'),('banana',3,3.0,'2021-01-03','03:00:00'),('apple',3,3.0,'2021-01-03','03:00:00');-- Get the mode of the utf8_col columnSELECT mode(utf8_col)as mode_utf8FROM test_table;-- Results in-- +----------+-- | mode_utf8|-- +----------+-- | apple |-- +----------+-- Get the mode of the date64_col columnSELECT mode(date64_col)as mode_dateFROM test_table;-- Results in-- +-----------+-- | mode_date |-- +-----------+-- | 2021-01-03|-- +-----------+-- Get the mode of the time64_col columnSELECT mode(time64_col)as mode_timeFROM test_table;-- Results in-- +-----------+-- | mode_time |-- +-----------+-- | 03:00:00 |-- +-----------+-- Get the x value associated with the maximum y valueSELECT max_by(x, y)FROMVALUES (1,10), (2,5), (3,15), (4,8)as tab(x, y);-- Results in-- +---------------------+-- | max_by(tab.x,tab.y) |-- +---------------------+-- | 3 |-- +---------------------+-- Get the x value associated with the minimum y valueSELECT min_by(x, y)FROMVALUES (1,10), (2,5), (3,15), (4,8)as tab(x, y);-- Results in-- +---------------------+-- | min_by(tab.x,tab.y) |-- +---------------------+-- | 2 |-- +---------------------+