subtest other

statement ok
CREATE TABLE kv (
  k   INT PRIMARY KEY,
  v   INT,
  w   INT,
  s   STRING,
  i   INTERVAL,
  arr INT[]
)

# Aggregate functions return NULL if there are no rows.
query IIIIRRRRRRRRBBTIIRRRRRRRRRRIRR
SELECT min(1), max(1), count(1), sum_int(1), avg(1), sum(1), stddev(1), stddev_samp(1), stddev_pop(1),
var_samp(1), variance(1), var_pop(1), bool_and(true), bool_and(false), xor_agg(b'\x01'), bit_and(1), bit_or(1),
corr(1, 1), covar_pop(1, 1), covar_samp(1, 1), sqrdiff(1), regr_intercept(1, 1), regr_r2(1, 1), regr_slope(1, 1),
regr_sxx(1, 1), regr_sxy(1, 1), regr_syy(1, 1), regr_count(1, 1), regr_avgx(1, 1), regr_avgy(1, 1)
FROM kv
----
NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 0 NULL NULL

# Regression test for #29695
query T
SELECT min(NULL)
----
NULL

# Aggregate functions return NULL if there are no rows.
query T
SELECT array_agg(1) FROM kv
----
NULL

query T
SELECT array_cat_agg('{1}'::INT[]) FROM kv
----
NULL

query T
SELECT json_agg(1) FROM kv
----
NULL

query T
SELECT jsonb_agg(1) FROM kv
----
NULL

query TTTT
SELECT min(i), avg(i), max(i), sum(i) FROM kv
----
NULL NULL NULL NULL

query IIIIRRRRRRRBBTRRRRRRRRRRIRR
SELECT min(v), max(v), count(v), sum_int(1), avg(v), sum(v), stddev(v), stddev_pop(v), variance(v), var_pop(v), var_samp(v),
bool_and(v = 1), bool_and(v = 1), xor_agg(s::bytes), corr(v,k), sqrdiff(v), covar_pop(v, k), covar_samp(v, k),
regr_intercept(v, k), regr_r2(v, k), regr_slope(v, k), regr_sxx(v, k), regr_sxy(v, k), regr_syy(v, k), regr_count(v, k),
regr_avgx(v, k), regr_avgy(v, k)
FROM kv
----
NULL NULL 0 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL 0 NULL NULL

query T
SELECT array_agg(v) FROM kv
----
NULL

query T
SELECT array_cat_agg(arr) FROM kv
----
NULL

query T
SELECT json_agg(v) FROM kv
----
NULL

query T
SELECT jsonb_agg(v) FROM kv
----
NULL

# Aggregate functions triggers aggregation and computation when there is no source.
query IIIIRRRRRRRBBTRRRRRRRRRRIRR
SELECT min(1), count(1), max(1), sum_int(1), avg(1)::float, sum(1), stddev_samp(1), stddev_pop(1), variance(1),
var_pop(1), var_samp(1), bool_and(true), bool_or(true), to_hex(xor_agg(b'\x01')), corr(1, 2), sqrdiff(1),
covar_pop(1, 2), covar_samp(1, 2), regr_intercept(1, 2), regr_r2(1, 2), regr_slope(1, 2), regr_sxx(1, 1), regr_sxy(1, 1),
regr_syy(1, 1), regr_count(1, 1), regr_avgx(1, 1), regr_avgy(1, 1)
----
1  1  1  1  1  1  NULL  0  NULL  0  NULL true  true  01  NULL 0 0 NULL NULL NULL NULL 0 0 0 1 1 1

# Aggregate functions triggers aggregation and computation when there is no source.
query T
SELECT array_agg(1)
----
{1}

query T
SELECT array_cat_agg('{1}'::INT[])
----
{1}

# Array-concatenating empty arrays results in an empty array.
query T
SELECT array_cat_agg(arr) FROM [SELECT ('{}'::INT[]) AS arr FROM generate_series(1, 3)];
----
{}

query T
SELECT json_agg(1)
----
[1]

query T
SELECT jsonb_agg(1)
----
[1]

# Some aggregate functions are not normalized to NULL when given a NULL
# argument.
query I
SELECT count(NULL)
----
0

query T
SELECT json_agg(NULL)
----
[null]

query T
SELECT jsonb_agg(NULL)
----
[null]

# This should ideally return {NULL}, but this is a pathological case, and
# Postgres has the same behavior, so it's sufficient for now.
statement error ambiguous call
SELECT array_agg(NULL)

# With an explicit cast, this works as expected.
query T
SELECT array_agg(NULL::TEXT)
----
{NULL}

# Regression test for #25724 (problem with typed NULLs and distsql planning).
# The previous query doesn't run under distsql.
query T
SELECT array_agg(NULL::TEXT) FROM (VALUES (1)) AS t(x)
----
{NULL}

# Check that COALESCE using aggregate results over an empty table
# work properly.
query I
SELECT COALESCE(max(1), 0) FROM generate_series(1,0)
----
0

query I
SELECT count_rows() FROM generate_series(1,100)
----
100

# Same, using arithmetic on COUNT.
query I
SELECT 1 + count(*) FROM generate_series(1,0)
----
1

# Same, using an empty table.
# The following test *must* occur before the first INSERT to the tables,
# so that it can observe an empty table.
query II
SELECT count(*), COALESCE(max(k), 1) FROM kv
----
0 1

# Same, using a subquery. (#12705)
query I
SELECT (SELECT COALESCE(max(1), 0) FROM generate_series(1,0))
----
0

statement OK
INSERT INTO kv VALUES
(1, 2, 3, 'a', '1min', '{1, 2, NULL}'),
(3, 4, 5, 'a', '2sec', '{3, 4, 5}'),
(5, NULL, 5, NULL, NULL, NULL),
(6, 2, 3, 'b', '1ms', '{6, 2, 3}'),
(7, 2, 2, 'b', '4 days', '{7, 2, 2}'),
(8, 4, 2, 'A', '3 years', '{NULL, 4, 2}')

# Aggregate functions triggers aggregation and computation for every row even when applied to a constant.
# NB: The XOR result is 00 because \x01 is XOR'd an even number of times.
query IIIIRRRRRRRBBTR
SELECT min(1), count(1), max(1), sum_int(1), avg(1)::float, sum(1), stddev(1), stddev_pop(1), variance(1)::float,
var_pop(1)::float, var_samp(1)::float, bool_and(true), bool_or(true), to_hex(xor_agg(b'\x01')), sqrdiff(1)
FROM kv
----
1 6 1 6 1 6 0 0 0 0 0 true true 00 0

# Aggregate functions triggers aggregation and computation for every row even when applied to a constant.
query T
SELECT array_agg(1) FROM kv
----
{1,1,1,1,1,1}

query T
SELECT array_cat_agg('{1, 2}'::INT[]) FROM kv
----
{1,2,1,2,1,2,1,2,1,2,1,2}

query T
SELECT json_agg(1) FROM kv
----
[1, 1, 1, 1, 1, 1]

query T
SELECT jsonb_agg(1) FROM kv
----
[1, 1, 1, 1, 1, 1]

# Even with no aggregate functions, grouping occurs in the presence of GROUP BY.
query I rowsort
SELECT 1 FROM kv GROUP BY v
----
1
1
1

# Presence of HAVING triggers aggregation, reducing results to one row (even without GROUP BY).
query I rowsort
SELECT 3 FROM kv HAVING TRUE
----
3

query error pgcode 42803 column "k" must appear in the GROUP BY clause or be used in an aggregate function
SELECT count(*), k FROM kv

query error unsupported comparison operator: <string> < <int>
SELECT count(*) FROM kv GROUP BY s < 5

query II rowsort
SELECT count(*), k FROM kv GROUP BY k
----
1 1
1 3
1 5
1 6
1 7
1 8

# GROUP BY specified using column index works.
query II rowsort
SELECT count(*), k FROM kv GROUP BY 2
----
1 1
1 3
1 5
1 6
1 7
1 8

query error aggregate functions are not allowed in GROUP BY
SELECT * FROM kv GROUP BY v, count(DISTINCT w)

query error aggregate functions are not allowed in GROUP BY
SELECT count(DISTINCT w) FROM kv GROUP BY 1

query error aggregate functions are not allowed in RETURNING
INSERT INTO kv (k, v) VALUES (99, 100) RETURNING sum(v)

query error column "v" does not exist
SELECT sum(v) FROM kv GROUP BY k LIMIT sum(v)

query error column "v" does not exist
SELECT sum(v) FROM kv GROUP BY k LIMIT 1 OFFSET sum(v)

query error aggregate functions are not allowed in VALUES
INSERT INTO kv (k, v) VALUES (99, count(1))

query error pgcode 42P10 GROUP BY position 5 is not in select list
SELECT count(*), k FROM kv GROUP BY 5

query error pgcode 42P10 GROUP BY position 0 is not in select list
SELECT count(*), k FROM kv GROUP BY 0

query error pgcode 42601 non-integer constant in GROUP BY
SELECT 1 GROUP BY 'a'

# Qualifying a name in the SELECT, the GROUP BY, both or neither should not affect validation.
query IT rowsort
SELECT count(*), kv.s FROM kv GROUP BY s
----
1 A
1 NULL
2 a
2 b

query IT rowsort
SELECT count(*), s FROM kv GROUP BY kv.s
----
1 A
1 NULL
2 a
2 b

query IT rowsort
SELECT count(*), kv.s FROM kv GROUP BY kv.s
----
1 A
1 NULL
2 a
2 b

query IT rowsort
SELECT count(*), s FROM kv GROUP BY s
----
1 A
1 NULL
2 a
2 b

# Grouping by more than one column works.
query III rowsort
SELECT v, count(*), w FROM kv GROUP BY v, w
----
2    1 2
2    2 3
4    1 2
4    1 5
NULL 1 5

# Grouping by more than one column using column numbers works.
query III rowsort
SELECT v, count(*), w FROM kv GROUP BY 1, 3
----
2    1 2
2    2 3
4    1 2
4    1 5
NULL 1 5

# Selecting and grouping on a function expression works.
query IT rowsort
SELECT count(*), upper(s) FROM kv GROUP BY upper(s)
----
1 NULL
2 B
3 A

# Selecting and grouping on a constant works.
query I
SELECT count(*) FROM kv GROUP BY 1+2
----
6

query I
SELECT count(*) FROM kv GROUP BY length('abc')
----
6

# Selecting a function of something which is grouped works.
query IT rowsort
SELECT count(*), upper(s) FROM kv GROUP BY s
----
1 A
1 NULL
2 A
2 B

# Selecting a value that is not grouped, even if a function of it it, does not work.
query error column "s" must appear in the GROUP BY clause or be used in an aggregate function
SELECT count(*), s FROM kv GROUP BY upper(s)

# Selecting and grouping on a more complex expression works.
query II rowsort
SELECT count(*), k+v FROM kv GROUP BY k+v
----
1 12
1 3
1 7
1 8
1 9
1 NULL


# Selecting a more complex expression, made up of things which are each grouped, works.
query II rowsort
SELECT count(*), k+v FROM kv GROUP BY k, v
----
1 12
1 3
1 7
1 8
1 9
1 NULL

query II rowsort
SELECT count(*), k+v FROM kv GROUP BY k
----
1  3
1  7
1  NULL
1  8
1  9
1  12

query error column "k" must appear in the GROUP BY clause or be used in an aggregate function
SELECT count(*), k+v FROM kv GROUP BY v

query error column "v" must appear in the GROUP BY clause or be used in an aggregate function
SELECT count(*), v/(k+v) FROM kv GROUP BY k+v

query error aggregate functions are not allowed in WHERE
SELECT k FROM kv WHERE avg(k) > 1

query error aggregate function calls cannot be nested
SELECT max(avg(k)) FROM kv

# Test case from #2761.
query II rowsort
SELECT count(kv.k) AS count_1, kv.v + kv.w AS lx FROM kv GROUP BY kv.v + kv.w
----
1  4
1  6
1  9
1  NULL
2  5

query TI rowsort
SELECT s, count(*) FROM kv GROUP BY s HAVING count(*) > 1
----
a 2
b 2

query TII rowsort
SELECT upper(s), count(DISTINCT s), count(DISTINCT upper(s)) FROM kv GROUP BY upper(s) HAVING count(DISTINCT s) > 1
----
A 2 1

query II rowsort
SELECT max(k), min(v) FROM kv HAVING min(v) > 2
----

query II rowsort
SELECT max(k), min(v) FROM kv HAVING max(v) > 2
----
8 2

query error pgcode 42803 aggregate function calls cannot be nested
SELECT max(k), min(v) FROM kv HAVING max(min(v)) > 2

query error argument of HAVING must be type bool, not type int
SELECT max(k), min(v) FROM kv HAVING k

# Expressions listed in the HAVING clause must conform to same validation as the SELECT clause (grouped or aggregated).
query error column "k" must appear in the GROUP BY clause or be used in an aggregate function
SELECT 3 FROM kv GROUP BY v HAVING k > 5

# Special case for grouping on primary key.
query I nosort
SELECT 3 FROM kv GROUP BY k HAVING v > 2
----
3
3

query error column "k" must appear in the GROUP BY clause or be used in an aggregate function
SELECT k FROM kv HAVING k > 7

query error at or near ",": syntax error
SELECT count(*, 1) FROM kv

query I
SELECT count(*)
----
1

query I
SELECT count(k) FROM kv
----
6

query I
SELECT count(1)
----
1

query I
SELECT count(1) FROM kv
----
6

query error unknown signature: count\(int, int\)
SELECT count(k, v) FROM kv

query II
SELECT v, count(k) FROM kv GROUP BY v ORDER BY v
----
NULL 1
2 3
4 2

query II
SELECT v, count(k) FROM kv GROUP BY v ORDER BY v DESC
----
4 2
2 3
NULL 1

query II
SELECT v, count(k) FROM kv GROUP BY v ORDER BY count(k) DESC
----
2 3
4 2
NULL 1

query II
SELECT v, count(k) FROM kv GROUP BY v ORDER BY v-count(k)
----
NULL 1
2 3
4 2

query II
SELECT v, count(k) FROM kv GROUP BY v ORDER BY 1 DESC
----
4 2
2 3
NULL 1

query III colnames
SELECT count(*), count(k), count(kv.v) FROM kv
----
count  count  count
6      6      5

query I
SELECT count(kv.*) FROM kv
----
6

query III
SELECT count(DISTINCT k), count(DISTINCT v), count(DISTINCT (v)) FROM kv
----
6 2 2

query TIII rowsort
SELECT upper(s), count(DISTINCT k), count(DISTINCT v), count(DISTINCT (v)) FROM kv GROUP BY upper(s)
----
A    3 2 2
B    2 1 1
NULL 1 0 0


query I
SELECT count((k, v)) FROM kv
----
6

query I
SELECT count(DISTINCT (k, v)) FROM kv
----
6

query I
SELECT count(DISTINCT (k, (v))) FROM kv
----
6

query I
SELECT count(*) FROM kv a, kv b
----
36

query I
SELECT count(DISTINCT a.*) FROM kv a, kv b
----
6

query I
SELECT count((k, v)) FROM kv LIMIT 1
----
6

query I
SELECT count((k, v)) FROM kv OFFSET 1
----

query I
SELECT count(k)+count(kv.v) FROM kv
----
11

query II
SELECT count(NULL::int), count((NULL, NULL))
----
0 1

query IIII
SELECT min(k), max(k), min(v), max(v) FROM kv
----
1 8 2 4

# Even if no input rows match, we expect a row (of nulls).
query IIII
SELECT min(k), max(k), min(v), max(v) FROM kv WHERE k > 8
----
NULL NULL NULL NULL

query TT
SELECT array_agg(k), array_agg(s) FROM (SELECT k, s FROM kv ORDER BY k)
----
{1,3,5,6,7,8}  {a,a,NULL,b,b,A}

query T
SELECT array_agg(k) || 1 FROM (SELECT k FROM kv ORDER BY k)
----
{1,3,5,6,7,8,1}

query T
SELECT array_cat_agg(arr) FROM (SELECT arr FROM kv ORDER BY k)
----
{1,2,NULL,3,4,5,6,2,3,7,2,2,NULL,4,2}

query T
SELECT array_agg(s) FROM kv WHERE s IS NULL
----
{NULL}

query T
SELECT array_cat_agg(arr) FROM kv WHERE arr IS NULL
----
NULL

query TTT
SELECT array_cat_agg(arr ORDER BY k), array_cat_agg(NULL::INT[]), array_cat_agg('{NULL, NULL}'::INT[]) FROM kv WHERE arr IS NOT NULL
----
{1,2,NULL,3,4,5,6,2,3,7,2,2,NULL,4,2}  NULL  {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}

query T
SELECT json_agg(s) FROM kv WHERE s IS NULL
----
[null]

query T
SELECT jsonb_agg(s) FROM kv WHERE s IS NULL
----
[null]

query RRRR
SELECT avg(k), avg(v), sum(k), sum(v) FROM kv
----
5.0000000000000000000  2.8000000000000000000  30  14

query TTTT
SELECT min(i), avg(i), max(i), sum(i) FROM kv
----
00:00:00.001  7 mons 6 days 19:12:12.4002  3 years  3 years 4 days 00:01:02.001

query RRRR
SELECT avg(k::decimal), avg(v::decimal), sum(k::decimal), sum(v::decimal) FROM kv
----
5.0000000000000000000  2.8000000000000000000  30  14

query RRRR
SELECT avg(DISTINCT k), avg(DISTINCT v), sum(DISTINCT k), sum(DISTINCT v) FROM kv
----
5.0000000000000000000  3.0000000000000000000  30  6

query R
SELECT avg(k) * 2.0 + max(v)::DECIMAL FROM kv
----
14.00000000000000000000

# Verify things work with distsql when some of the nodes emit no results in the
# local stage.
query R
SELECT avg(k) * 2.0 + max(v)::DECIMAL FROM kv WHERE w*2 = k
----
14.00000000000000000000

# Grouping columns can be eliminated, but should still return zero rows (i.e.
# shouldn't use scalar GroupBy).
query I
SELECT max(v) FROM kv GROUP BY k HAVING k=100
----

# Same query as above, but using scalar GroupBy (should return default row).
query I
SELECT max(v) FROM kv WHERE k=100
----
NULL

statement ok
CREATE TABLE abc (
  a VARCHAR PRIMARY KEY,
  b FLOAT,
  c BOOLEAN,
  d DECIMAL
)

statement ok
INSERT INTO abc VALUES ('one', 1.5, true, 5::decimal), ('two', 2.0, false, 1.1::decimal)

query TRBR
SELECT min(a), min(b), min(c), min(d) FROM abc
----
one 1.5 false 1.1

query TRBR
SELECT max(a), max(b), max(c), max(d) FROM abc
----
two 2 true 5

query RRRR
SELECT avg(b), sum(b), avg(d), sum(d) FROM abc
----
1.75  3.5  3.0500000000000000000  6.1

# Verify summing of intervals
statement ok
CREATE TABLE intervals (
  a INTERVAL PRIMARY KEY
)

statement ok
INSERT INTO intervals VALUES (INTERVAL '1 year 2 months 3 days 4 seconds'), (INTERVAL '2 year 3 months 4 days 5 seconds'), (INTERVAL '10000ms')

query T
SELECT sum(a) FROM intervals
----
3 years 5 mons 7 days 00:00:19


query error unknown signature: avg\(varchar\)
SELECT avg(a) FROM abc

query error unknown signature: avg\(bool\)
SELECT avg(c) FROM abc

query error unknown signature: avg\(tuple{varchar, bool}\)
SELECT avg((a,c)) FROM abc

query error unknown signature: sum\(varchar\)
SELECT sum(a) FROM abc

query error unknown signature: sum\(bool\)
SELECT sum(c) FROM abc

query error unknown signature: sum\(tuple{varchar, bool}\)
SELECT sum((a,c)) FROM abc

statement ok
CREATE TABLE xyz (
  x INT PRIMARY KEY,
  y INT,
  z FLOAT,
  w INT,
  INDEX xy (x, y),
  INDEX zyx (z, y, x),
  INDEX w (w),
  FAMILY (x),
  FAMILY (y),
  FAMILY (z)
)

statement ok
INSERT INTO xyz VALUES (1, 2, 3.0, NULL), (4, 5, 6.0, 2), (7, NULL, 8.0, 3)

query I
SELECT min(x) FROM xyz
----
1

query I
SELECT min(y) FROM xyz
----
2

query I
SELECT min(w) FROM xyz
----
2

query IF
SELECT min(x), max(z) FROM xyz
----
1  8

query IF
SELECT min(x)+1, max(z)+1 FROM xyz
----
2  9

query IFR
SELECT min(x), max(z), sum(x) FROM xyz
----
1  8  12

query II
SELECT min(y), max(y) FROM xyz WHERE x IN (0, 4, 7)
----
5  5

query II
SELECT min(x), max(x) FROM xyz WHERE x = 1
----
1  1

query FI
SELECT min(z), max(y) FROM xyz WHERE z IN (3.0, 6.0, 8.0)
----
3  5

query FI
SELECT max(z), min(x) FROM (SELECT x,y,z FROM xyz a) dt WHERE dt.y > 0
----
6  1

query FI
SELECT max(z), min(x) FROM xyz WHERE (z,x) = (SELECT max(z), min(x) FROM xyz)
----
NULL  NULL

query FI
SELECT max(z), min(x) FROM xyz HAVING (max(z), min(x)) = (SELECT max(z), min(x) FROM xyz)
----
8  1

query I
SELECT min(x) FROM xyz WHERE x IN (0, 4, 7)
----
4

query I
SELECT max(x) FROM xyz
----
7

query I
SELECT min(y) FROM xyz WHERE x = 1
----
2

query I
SELECT max(y) FROM xyz WHERE x = 1
----
2

query I
SELECT min(y) FROM xyz WHERE x = 7
----
NULL

query I
SELECT max(y) FROM xyz WHERE x = 7
----
NULL

query I
SELECT min(x) FROM xyz WHERE (y, z) = (2, 3.0)
----
1

query I
SELECT max(x) FROM xyz WHERE (z, y) = (3.0, 2)
----
1

# VARIANCE/STDDEV

query RRR
SELECT var_samp(x), variance(y::decimal), round(var_samp(z), 14) FROM xyz
----
9 4.5 6.33333333333333

query R
SELECT variance(x) FROM xyz WHERE x = 10
----
NULL

query R
SELECT variance(x) FROM xyz WHERE x = 1
----
NULL

query RRR
SELECT var_pop(x), var_pop(y::decimal), round(var_pop(z), 14) FROM xyz
----
6 2.25 4.22222222222222

query R
SELECT var_pop(x) FROM xyz WHERE x = 10
----
NULL

query R
SELECT var_pop(x) FROM xyz WHERE x = 1
----
0

query RRR
SELECT stddev_samp(x), stddev(y::decimal), round(stddev_samp(z), 14) FROM xyz
----
3.0000000000000000000  2.1213203435596425732  2.51661147842358

query R
SELECT stddev(x) FROM xyz WHERE x = 1
----
NULL

query RRR
SELECT stddev_pop(x), stddev_pop(y::decimal), round(stddev_pop(z), 14) FROM xyz
----
2.4494897427831780982  1.5000000000000000000  2.05480466765633

query R
SELECT stddev_pop(x) FROM xyz WHERE x = 1
----
0

# Ensure subqueries don't trigger aggregation.
query B
SELECT x > (SELECT avg(0)) FROM xyz LIMIT 1
----
true

statement ok
DROP TABLE xyz

# SQRDIFF

statement ok
DROP TABLE IF EXISTS ifd;
CREATE TABLE ifd
(
    i int,
    f float,
    d decimal
);
INSERT INTO ifd (i, f, d)
VALUES (1, 1.1, 1.1),
       (2, 2.2, 2.2),
       (5, 3.0, 3.0),
       (10, 7.8, 7.8),
       (11, 9.0, 9.0),
       (18, 11.2, 11.2);

query FRF
SELECT sqrdiff(i), round(sqrdiff(f), 12), sqrdiff(d)
FROM ifd
----
206.8333333333333333333334 86.248333333333 86.24833333333333333333333

query FRF
SELECT sqrdiff(i), round(sqrdiff(f), 2), sqrdiff(d)
FROM ifd
WHERE i < 10
----
8.666666666666666666666666 1.82 1.82

statement ok
DROP TABLE IF EXISTS sqrdiff

# Numerical stability test for VARIANCE/STDDEV.
# See https://www.johndcook.com/blog/2008/09/28/theoretical-explanation-for-numerical-results.
# Avoid using random() since we do not have the deterministic option to specify a pseudo-random seed yet.
# Note under distsql, this is non-deterministic since the running variance/stddev algorithms depend on
# the local sum of squared difference values which depend on how the data is distributed across the distsql nodes.
statement ok
CREATE TABLE mnop (
  m INT PRIMARY KEY,
  n FLOAT,
  o DECIMAL,
  p BIGINT
)

statement ok
INSERT INTO mnop (m, n) SELECT i, (1e9 + i/100.0)::float FROM
  generate_series(1, 100) AS i(i)

statement ok
UPDATE mnop SET o = n::decimal, p = (n * 10)::bigint

query RRR
SELECT round(variance(n), 2), round(variance(o), 2), round(variance(p)) FROM mnop
----
0.08 0.08 9

query RRR
SELECT round(var_pop(n), 2), round(var_pop(o), 2), round(var_pop(p)) FROM mnop
----
0.08 0.08 9

query RRR
SELECT round(stddev_samp(n), 2), round(stddev_samp(o), 2), round(stddev_samp(p)) FROM mnop
----
0.29 0.29 3

query RRR
SELECT round(stddev_pop(n), 2), round(stddev_pop(o), 2), round(stddev_pop(p)) FROM mnop
----
0.29 0.29 3

query RRR
SELECT avg(1::int)::float, avg(2::float)::float, avg(3::decimal)::float
----
1 2 3

query III
SELECT count(2::int), count(3::float), count(4::decimal)
----
1 1 1

query RRR
SELECT sum(1::int), sum(2::float), sum(3::decimal)
----
1 2 3

query RRR
SELECT variance(1::int), variance(1::float), variance(1::decimal)
----
NULL NULL NULL

query RRR
SELECT var_pop(1::int), var_pop(1::float), var_pop(1::decimal)
----
0  0  0

query RRR
SELECT stddev(1::int), stddev_samp(1::float), stddev(1::decimal)
----
NULL NULL NULL

query RRR
SELECT stddev_pop(1::int), stddev_pop(1::float), stddev_pop(1::decimal)
----
0  0  0

statement ok
CREATE TABLE bits (b INT)

query II
SELECT bit_and(b), bit_or(b) FROM bits
----
NULL NULL

statement ok
INSERT INTO bits VALUES (12), (25)

query II
SELECT bit_and(b), bit_or(b) FROM bits
----
8 29

statement ok
INSERT INTO bits VALUES(105)

query II
SELECT bit_and(b), bit_or(b) FROM bits
----
8 125

statement ok
INSERT INTO bits VALUES(NULL)

query II
SELECT bit_and(b), bit_or(b) FROM bits
----
8 125

statement ok
CREATE TABLE bools (b BOOL)

query BB
SELECT bool_and(b), bool_or(b) FROM bools
----
NULL NULL

statement OK
INSERT INTO bools VALUES (true), (true), (true)

query BB
SELECT bool_and(b), bool_or(b) FROM bools
----
true true

statement OK
INSERT INTO bools VALUES (false), (false)

query BB
SELECT bool_and(b), bool_or(b) FROM bools
----
false true

statement OK
DELETE FROM bools WHERE b

query BB
SELECT bool_and(b), bool_or(b) FROM bools
----
false false

query T
SELECT concat_agg(s) FROM (SELECT s FROM kv ORDER BY k)
----
aabbA

query T
SELECT json_agg(s) FROM (SELECT s FROM kv ORDER BY k)
----
["a", "a", null, "b", "b", "A"]

query T
SELECT jsonb_agg(s) FROM (SELECT s FROM kv ORDER BY k)
----
["a", "a", null, "b", "b", "A"]

# Verify that FILTER works.

statement ok
CREATE TABLE filter_test (
  k INT,
  v INT,
  mark BOOL
)

statement OK
INSERT INTO filter_test VALUES
(1, 2, false),
(3, 4, true),
(5, NULL, true),
(6, 2, true),
(7, 2, true),
(8, 4, true),
(NULL, 4, true)

# FILTER should eliminate some results.
query II rowsort
SELECT v, count(*) FILTER (WHERE k > 5) FROM filter_test GROUP BY v
----
2 2
4 1
NULL 0

# Test multiple filters
query IBIII rowsort
SELECT v, mark, count(*) FILTER (WHERE k > 5), count(*), max(k) FILTER (WHERE k < 8) FROM filter_test GROUP BY v, mark
----
2 false 0 1 1
2 true 2 2 7
4 true 1 3 3
NULL true 0 1 5

query error FILTER specified but abs\(\) is not an aggregate function
SELECT k, abs(k) FILTER (WHERE k=1) FROM kv

query error at or near "filter": syntax error
SELECT k FILTER (WHERE k=1) FROM kv GROUP BY k

query error aggregate functions are not allowed in FILTER
SELECT v, count(*) FILTER (WHERE count(*) > 5) FROM filter_test GROUP BY v

# Tests with * inside GROUP BY.
query I nosort
SELECT 1 FROM kv GROUP BY kv.*
----
1
1
1
1
1
1

query R rowsort
SELECT sum(abc.d) FROM kv JOIN abc ON kv.k >= abc.d GROUP BY kv.*
----
1.1
6.1
6.1
6.1
6.1

# opt_test is used for tests around the single-row optimization for MIN/MAX.
statement ok
CREATE TABLE opt_test (k INT PRIMARY KEY, v INT, INDEX v(v))

statement ok
INSERT INTO opt_test VALUES (1, NULL), (2, 10), (3, NULL), (4, 5)

# Verify that we correctly add the v IS NOT NULL constraint (which restricts the span).
# Without the "v IS NOT NULL" constraint, this result would incorrectly be NULL.
query I
SELECT min(v) FROM opt_test
----
5

# Cross-check against a query without this optimization.
query I
SELECT min(v) FROM opt_test@opt_test_pkey
----
5

# Repeat test when there is an existing filter.
query I
SELECT min(v) FROM opt_test WHERE k <> 4
----
10

# Verify that we don't use the optimization if there is a GROUP BY.
query I rowsort
SELECT min(v) FROM opt_test GROUP BY k
----
NULL
NULL
5
10

query I rowsort
SELECT max(v) FROM opt_test GROUP BY k
----
NULL
NULL
5
10

statement ok
CREATE TABLE xor_bytes (a bytes, b int, c int)

statement ok
INSERT INTO xor_bytes VALUES
  (b'\x01\x01', 1, 3),
  (b'\x02\x01', 1, 1),
  (b'\x04\x01', 2, -5),
  (b'\x08\x01', 2, -1),
  (b'\x10\x01', 2, 0)

query TI
SELECT to_hex(xor_agg(a)), xor_agg(c) FROM xor_bytes
----
1f01 6

query TII
SELECT to_hex(xor_agg(a)), b, xor_agg(c) FROM xor_bytes GROUP BY b ORDER BY b
----
0300  1   2
1c01  2   4

statement error arguments to xor must all be the same length
SELECT xor_agg(i) FROM (VALUES (b'\x01'), (b'\x01\x01')) AS a(i)

query BB
SELECT max(true), min(true)
----
true
true

# Grouping and rendering tuples.
statement OK
CREATE TABLE ab (
  a INT PRIMARY KEY,
  b INT,
  FAMILY (a),
  FAMILY (b)
);
INSERT INTO ab(a,b) VALUES (1,2), (3,4);
CREATE TABLE xy(x STRING, y STRING);
INSERT INTO xy(x, y) VALUES ('a', 'b'), ('c', 'd')

# Grouping and rendering tuples.
query T rowsort
SELECT (b, a) FROM ab GROUP BY (b, a)
----
(2,1)
(4,3)

query TT rowsort
SELECT min(y), (b, a)
 FROM ab, xy GROUP BY (x, (a, b))
----
b  (2,1)
d  (2,1)
b  (4,3)
d  (4,3)

# Test that ordering on GROUP BY columns is maintained.
statement ok
CREATE TABLE group_ord (
  x INT PRIMARY KEY,
  y INT,
  z INT,
  INDEX foo(z)
)

statement ok
INSERT INTO group_ord VALUES
(1, 2, 3),
(3, 4, 5),
(5, NULL, 5),
(6, 2, 3),
(7, 2, 2),
(8, 4, 2)

# The ordering is on all the GROUP BY columns, and isn't preserved after the
# aggregation.
query II rowsort
SELECT x, max(y) FROM group_ord GROUP BY x
----
1  2
3  4
5  NULL
6  2
7  2
8  4

# The ordering is on all the GROUP BY columns, and is preserved after the
# aggregation.
query II
SELECT x, max(y) FROM group_ord GROUP BY x ORDER BY x
----
1  2
3  4
5  NULL
6  2
7  2
8  4

# The ordering is on some of the GROUP BY columns, and isn't preserved after
# the aggregation.
query III rowsort
SELECT z, x, max(y) FROM group_ord GROUP BY x, z
----
5  3  4
3  6  2
3  1  2
5  5  NULL
2  7  2
2  8  4

# The ordering is on some of the GROUP BY columns, and is preserved after
# the aggregation.
query III
SELECT z, x, max(y) FROM group_ord GROUP BY x, z ORDER BY x
----
3  1  2
5  3  4
5  5  NULL
3  6  2
2  7  2
2  8  4

# If the underlying ordering isn't from the primary index, it needs to be hinted
# for now.
query II rowsort
SELECT z, max(y) FROM group_ord@foo GROUP BY z
----
5  4
2  4
3  2

# Test that a merge join is used on two aggregate subqueries with orderings on
# the GROUP BY columns. Note that an ORDER BY is not necessary on the
# subqueries.
query IIII rowsort
SELECT * FROM (SELECT x, max(y) FROM group_ord GROUP BY x) JOIN (SELECT z, min(y) FROM group_ord@foo GROUP BY z) ON x = z
----
5  NULL  5  4
3  4     3  2

# Test max and min when agg column is the second column in an index.
statement ok
CREATE TABLE index_tab (
  region STRING,
  data INT NOT NULL,
  INDEX (region, data)
)

statement ok
INSERT INTO index_tab
(VALUES
  ('US_WEST', 3),
  ('US_EAST', 23),
  ('US_EAST', -14),
  ('ASIA', 3294),
  ('ASIA', -3),
  ('US_WEST', 31),
  ('EUROPE', 123),
  ('US_EAST', -3000)
)

query I
SELECT max(data) FROM index_tab WHERE region = 'US_WEST' OR region = 'US_EAST'
----
31

query I
SELECT min(data) FROM index_tab WHERE region = 'US_WEST' OR region = 'US_EAST'
----
-3000

statement ok
DROP TABLE index_tab

# Regression test for #23798 until #10495 is fixed.
statement error function reserved for internal use
SELECT final_variance(1.2, 1.2, 123) FROM kv

# Regression test for #25533 (crash when propagating filter through GROUP BY).
query I nosort
SELECT 1 FROM kv GROUP BY v, w::DECIMAL HAVING w::DECIMAL > 1
----
1
1
1
1
1

# Regression test for distsql aggregator crash when using hash aggregation.
query IT rowsort
SELECT v, array_agg('a') FROM kv GROUP BY v
----
2     {a,a,a}
4     {a,a}
NULL  {a}

# Regression test for #26419
query I
SELECT 123 FROM kv ORDER BY max(v)
----
123

subtest statistics

statement OK
CREATE TABLE statistics_agg_test (
  y float,
  x float,
  int_y int,
  int_x int,
  dy decimal,
  dx decimal
)

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,   10.0,    1,   10, 1.0,   10.0),
  (2.0,   25.0,    2,   25, 2.0,   25.0),
  (2.0,   25.0,    2,   25, 2.0,   25.0),
  (3.0,   40.0,    3,   40, 3.0,   40.0),
  (3.0,   40.0,    3,   40, 3.0,   40.0),
  (3.0,   40.0,    3,   40, 3.0,   40.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (NULL,  NULL, NULL, NULL, NULL, NULL)

query FFFFF
SELECT corr(y, x), corr(int_y, int_x), corr(y, int_x), corr(int_y, x), corr(dy, dx)
FROM statistics_agg_test
----
0.9330078226479681 0.9330078226479681 0.9330078226479681 0.9330078226479681 0.9330078226479681

query FFFF
SELECT corr(y, dx), corr(int_y, dx), corr(dy, int_x), corr(dy, x)
FROM statistics_agg_test
----
0.9330078226479681 0.9330078226479681 0.9330078226479681 0.9330078226479681

query F
SELECT corr(DISTINCT y, x)
FROM statistics_agg_test
----
0.9326733179802503

query F
SELECT CAST(corr(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 30) AS decimal)
FROM statistics_agg_test
----
0.9326733179802503

query error pq: unknown signature: corr\(string, string\)
SELECT corr(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0, 1.797693134862315708145274237317043567981e+308, 0)

query FF
SELECT corr(y, x), corr(int_y, int_x) FROM statistics_agg_test
----
-0.443213217542505  0.917580124387801

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT corr(y, x), corr(int_y, int_x), corr(dy, dx)
FROM statistics_agg_test
----
1 1 1

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT corr(y, x), corr(int_y, int_x), corr(dy, dx)
FROM statistics_agg_test
----
-1 -1 -1

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT corr(y, x), corr(int_y, int_x), corr(dy, dx)
FROM statistics_agg_test
----
NULL NULL NULL

statement OK
TRUNCATE statistics_agg_test

subtest covar_pop

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT covar_pop(y, x), covar_pop(int_y, int_x), covar_pop(y, int_x), covar_pop(int_y, x), round(covar_pop(dy, dx), 7)
FROM statistics_agg_test
----
-149.7003372 33 1100.4 -25.336322 -149.7003372

query FFFF
SELECT covar_pop(y, dx), covar_pop(int_y, dx), covar_pop(dy, int_x), covar_pop(dy, x)
FROM statistics_agg_test
----
-149.7003372 -25.336322 1100.4 -149.7003372

query F
SELECT covar_pop(DISTINCT y, x)
FROM statistics_agg_test
----
653.62895125

query F
SELECT CAST(covar_pop(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
-1109.4299999999998

query error pq: unknown signature: covar_pop\(string, string\)
SELECT covar_pop(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query error float out of range
SELECT covar_pop(y, x), covar_pop(int_y, int_x) FROM statistics_agg_test

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT covar_pop(y, x), covar_pop(int_y, int_x), covar_pop(dy, dx)
FROM statistics_agg_test
----
2.5 2.5 2.5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT covar_pop(y, x), covar_pop(int_y, int_x), covar_pop(dy, dx)
FROM statistics_agg_test
----
-7.5 -7.5 -7.5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT covar_pop(y, x), covar_pop(int_y, int_x), covar_pop(dy, dx)
FROM statistics_agg_test
----
0 0 0

statement OK
TRUNCATE statistics_agg_test

subtest covar_samp

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT covar_samp(y, x), covar_samp(int_y, int_x), covar_samp(y, int_x), covar_samp(int_y, x), round(covar_samp(dy, dx), 6)
FROM statistics_agg_test
----
-166.333708  36.666666666666664  1222.6666666666667  -28.15146888888889  -166.333708

query FFFF
SELECT covar_samp(y, dx), covar_samp(int_y, dx), covar_samp(dy, int_x), covar_samp(dy, x)
FROM statistics_agg_test
----
-166.333708 -28.1514688888889 1222.66666666667 -166.333708

query F
SELECT covar_samp(DISTINCT y, x)
FROM statistics_agg_test
----
871.505268333333

query F
SELECT CAST(covar_samp(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
-2218.8599999999997

query error pq: unknown signature: covar_samp\(string, string\)
SELECT covar_samp(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query error float out of range
SELECT covar_samp(y, x), covar_samp(int_y, int_x) FROM statistics_agg_test

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT covar_samp(y, x), covar_samp(int_y, int_x), covar_samp(dy, dx)
FROM statistics_agg_test
----
5 5 5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT covar_samp(y, x), covar_samp(int_y, int_x), covar_samp(dy, dx)
FROM statistics_agg_test
----
-15 -15 -15

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT covar_samp(y, x), covar_samp(int_y, int_x), covar_samp(dy, dx)
FROM statistics_agg_test
----
0 0 0

statement OK
TRUNCATE statistics_agg_test

subtest regr_intercept

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT regr_intercept(y, x), regr_intercept(int_y, int_x), regr_intercept(y, int_x), regr_intercept(int_y, x), regr_intercept(dy, dx)
FROM statistics_agg_test
----
66.3766879252252 1.47002398081535 14.1822541966427 3.19915081505252 66.3766879252252

query FFFF
SELECT regr_intercept(y, dx), regr_intercept(int_y, dx), regr_intercept(dy, int_x), regr_intercept(dy, x)
FROM statistics_agg_test
----
66.3766879252252 3.19915081505252 14.1822541966427 66.3766879252252

query F
SELECT regr_intercept(DISTINCT y, x)
FROM statistics_agg_test
----
45.395366359744

query F
SELECT CAST(regr_intercept(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
56.34450123036154

query error pq: unknown signature: regr_intercept\(string, string\)
SELECT regr_intercept(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query FF
SELECT regr_intercept(y, x), regr_intercept(int_y, int_x) FROM statistics_agg_test
----
2.79449911391682e+307  1.07386861313869

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT regr_intercept(y, x), regr_intercept(int_y, int_x), regr_intercept(dy, dx)
FROM statistics_agg_test
----
0 0 0

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT regr_intercept(y, x), regr_intercept(int_y, int_x), regr_intercept(dy, dx)
FROM statistics_agg_test
----
1.3333333333333333  1.3333333333333333  1.3333333333333333

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT regr_intercept(y, x), regr_intercept(int_y, int_x), regr_intercept(dy, dx)
FROM statistics_agg_test
----
1 1 1

statement OK
TRUNCATE statistics_agg_test

subtest regr_r2

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT regr_r2(y, x), regr_r2(int_y, int_x), regr_r2(y, int_x), round(regr_r2(int_y, x), 15), regr_r2(dy, dx)
FROM statistics_agg_test
----
0.001579883732409151  0.8705035971223022  0.9284163829157668  0.047181068076447  0.001579883732409151

query FFFF
SELECT regr_r2(y, dx), round(regr_r2(int_y, x), 15), regr_r2(dy, int_x), regr_r2(dy, x)
FROM statistics_agg_test
----
0.001579883732409151  0.047181068076447  0.9284163829157668  0.001579883732409151

query F
SELECT regr_r2(DISTINCT y, x)
FROM statistics_agg_test
----
0.0194977977947556

query F
SELECT CAST(regr_r2(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
1

query error pq: unknown signature: regr_r2\(string, string\)
SELECT regr_r2(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query FF
SELECT regr_r2(y, x), regr_r2(int_y, int_x) FROM statistics_agg_test
----
0.070994090464941  0.841953284671533

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT regr_r2(y, x), regr_r2(int_y, int_x), regr_r2(dy, dx)
FROM statistics_agg_test
----
1 1 1

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT regr_r2(y, x), regr_r2(int_y, int_x), regr_r2(dy, dx)
FROM statistics_agg_test
----
1 1 1

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT regr_r2(y, x), regr_r2(int_y, int_x), regr_r2(dy, dx)
FROM statistics_agg_test
----
1 1 1

statement OK
TRUNCATE statistics_agg_test

subtest regr_slope

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT regr_slope(y, x), regr_slope(int_y, int_x), regr_slope(y, int_x), regr_slope(int_y, x), regr_slope(dy, dx)
FROM statistics_agg_test
----
-0.0110028047689694 0.026378896882494 0.879616306954436 -0.00186219089244472 -0.0110028047689694

query FFFF
SELECT regr_slope(y, dx), regr_slope(int_y, dx), regr_slope(dy, int_x), regr_slope(dy, x)
FROM statistics_agg_test
----
-0.0110028047689694 -0.00186219089244472 0.879616306954436 -0.0110028047689694

query F
SELECT regr_slope(DISTINCT y, x)
FROM statistics_agg_test
----
0.0380258825612487

query F
SELECT CAST(regr_slope(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
-0.04416682440532526

query error pq: unknown signature: regr_slope\(string, string\)
SELECT regr_slope(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query FF
SELECT regr_slope(y, x), regr_slope(int_y, int_x) FROM statistics_agg_test
----
-1.19338306247506e+305  0.0313576642335766

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT regr_slope(y, x), regr_slope(int_y, int_x), regr_slope(dy, dx)
FROM statistics_agg_test
----
0.1 0.1 0.1

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT regr_slope(y, x), regr_slope(int_y, int_x), regr_slope(dy, dx)
FROM statistics_agg_test
----
-0.03333333333333333  -0.03333333333333333  -0.03333333333333333

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT regr_slope(y, x), regr_slope(int_y, int_x), regr_slope(dy, dx)
FROM statistics_agg_test
----
0 0 0

statement OK
TRUNCATE statistics_agg_test

subtest regr_sxx

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT regr_sxx(y, x), regr_sxx(int_y, int_x), regr_sxx(y, int_x), regr_sxx(int_y, x), regr_sxx(dy, dx)
FROM statistics_agg_test
----
136056.52408028886 12510 12510 136056.52408028886 136056.52408028886

query FFFF
SELECT regr_sxx(y, dx), regr_sxx(int_y, dx), regr_sxx(dy, int_x), regr_sxx(dy, x)
FROM statistics_agg_test
----
136056.52408028886 136056.52408028886 12510 136056.52408028886

query F
SELECT regr_sxx(DISTINCT y, x)
FROM statistics_agg_test
----
68756.21626371906

query F
SELECT CAST(regr_sxx(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
50238.1602

query error pq: unknown signature: regr_sxx\(string, string\)
SELECT regr_sxx(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (0, 1.797693134862315708145274237317043567981e+308, 0, 0)

query error float out of range
SELECT regr_sxx(y, x), regr_sxx(int_y, int_x) FROM statistics_agg_test

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT regr_sxx(y, x), regr_sxx(int_y, int_x), regr_sxx(dy, dx)
FROM statistics_agg_test
----
50 50 50

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT regr_sxx(y, x), regr_sxx(int_y, int_x), regr_sxx(dy, dx)
FROM statistics_agg_test
----
450 450 450

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT regr_sxx(y, x), regr_sxx(int_y, int_x), regr_sxx(dy, dx)
FROM statistics_agg_test
----
2 2 2

statement OK
TRUNCATE statistics_agg_test

subtest regr_sxy

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT regr_sxy(y, x), regr_sxy(int_y, int_x), regr_sxy(y, int_x), regr_sxy(int_y, x), regr_sxy(dy, dx)
FROM statistics_agg_test
----
-1497.0033719999974 330 11004 -253.36321999999993 -1497.0033719999974

query FFFF
SELECT regr_sxy(y, dx), regr_sxy(int_y, dx), regr_sxy(dy, int_x), regr_sxy(dy, x)
FROM statistics_agg_test
----
-1497.0033719999974 -253.36321999999993 11004 -1497.0033719999974

query F
SELECT regr_sxy(DISTINCT y, x)
FROM statistics_agg_test
----
2614.515804999999

query F
SELECT CAST(regr_sxy(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
-2218.86

query error pq: unknown signature: regr_sxy\(string, string\)
SELECT regr_sxy(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query error float out of range
SELECT regr_sxy(y, x), regr_sxy(int_y, int_x) FROM statistics_agg_test

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT regr_sxy(y, x), regr_sxy(int_y, int_x), regr_sxy(dy, dx)
FROM statistics_agg_test
----
5 5 5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT regr_sxy(y, x), regr_sxy(int_y, int_x), regr_sxy(dy, dx)
FROM statistics_agg_test
----
-15 -15 -15

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT regr_sxy(y, x), regr_sxy(int_y, int_x), regr_sxy(dy, dx)
FROM statistics_agg_test
----
0 0 0

statement OK
TRUNCATE statistics_agg_test

subtest regr_syy

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT regr_syy(y, x), regr_syy(int_y, int_x), regr_syy(y, int_x), regr_syy(int_y, x), regr_syy(dy, dx)
FROM statistics_agg_test
----
10425.6 10 10425.6 10 10425.6

query FFFF
SELECT regr_syy(y, dx), regr_syy(int_y, dx), regr_syy(dy, int_x), regr_syy(dy, x)
FROM statistics_agg_test
----
10425.6 10 10425.6 10425.6

query F
SELECT regr_syy(DISTINCT y, x)
FROM statistics_agg_test
----
5099

query F
SELECT CAST(regr_syy(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
98

query error pq: unknown signature: regr_syy\(string, string\)
SELECT regr_syy(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query error float out of range
SELECT regr_syy(y, x), regr_syy(int_y, int_x) FROM statistics_agg_test

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT regr_syy(y, x), regr_syy(int_y, int_x), regr_syy(dy, dx)
FROM statistics_agg_test
----
0.5 0.5 0.5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT regr_syy(y, x), regr_syy(int_y, int_x), regr_syy(dy, dx)
FROM statistics_agg_test
----
0.5 0.5 0.5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT regr_syy(y, x), regr_syy(int_y, int_x), regr_syy(dy, dx)
FROM statistics_agg_test
----
0 0 0

statement OK
TRUNCATE statistics_agg_test

subtest regr_count

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query IIIII
SELECT regr_count(y, x), regr_count(int_y, int_x), regr_count(y, int_x), regr_count(int_y, x), regr_count(dy, dx)
FROM statistics_agg_test
----
10 10 10 10 10

query IIII
SELECT regr_count(y, dx), regr_count(int_y, dx), regr_count(dy, int_x), regr_count(dy, x)
FROM statistics_agg_test
----
10 10 10 10

query I
SELECT regr_count(DISTINCT y, x)
FROM statistics_agg_test
----
4

query I
SELECT regr_count(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100)
FROM statistics_agg_test
----
2

query error pq: unknown signature: regr_count\(string, string\)
SELECT regr_count(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+308, 0, 0, 0)

query III
SELECT regr_count(y, x), regr_count(int_y, int_x), regr_count(dy, dx) FROM statistics_agg_test
----
11 11 10

statement OK
TRUNCATE statistics_agg_test

query I
SELECT regr_count(y, x) FROM statistics_agg_test
----
0

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (NULL, NULL, NULL, NULL, NULL, NULL),
  (NULL, NULL, NULL, NULL, NULL, NULL)

query I
SELECT regr_count(y, x)
FROM statistics_agg_test
----
0

statement OK
TRUNCATE statistics_agg_test

subtest regr_avgx

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,   10.0,    1,   10, 1.0,   10.0),
  (2.0,   25.0,    2,   25, 2.0,   25.0),
  (2.0,   25.0,    2,   25, 2.0,   25.0),
  (3.0,   40.0,    3,   40, 3.0,   40.0),
  (3.0,   40.0,    3,   40, 3.0,   40.0),
  (3.0,   40.0,    3,   40, 3.0,   40.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (4.0,  100.0,    4,  100, 4.0,  100.0),
  (NULL,  NULL, NULL, NULL, NULL, NULL)

query FFFF
SELECT regr_avgx(y, x)::decimal, regr_avgx(int_y, int_x)::decimal, regr_avgx(y, int_x)::decimal,
regr_avgx(int_y, x)::decimal
FROM statistics_agg_test
----
58  58  58  58

query FFFF
SELECT regr_avgx(y, dx), regr_avgx(int_y, dx), regr_avgx(dy, int_x), regr_avgx(dy, x)
FROM statistics_agg_test
----
58  58  58  58

query F
SELECT regr_avgx(DISTINCT y, x)
FROM statistics_agg_test
----
43.75

query F
SELECT CAST(regr_avgx(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 30) AS decimal)
FROM statistics_agg_test
----
43.75

query error pq: unknown signature: regr_avgx\(string, string\)
SELECT regr_avgx(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (0, 1.797693134862315708145274237317043567981e+408, 0, 0, 0, 1.797693134862315708145274237317043567981e+408)

query error float out of range
SELECT regr_avgx(y, x), regr_avgx(int_y, int_x) FROM statistics_agg_test

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query RRR
SELECT regr_avgx(y, x), regr_avgx(int_y, int_x), regr_avgx(dy, dx)
FROM statistics_agg_test
----
15 15 15

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query RRR
SELECT regr_avgx(y, x), regr_avgx(int_y, int_x), regr_avgx(dy, dx)
FROM statistics_agg_test
----
-5 -5 -5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query RRR
SELECT regr_avgx(y, x), regr_avgx(int_y, int_x), regr_avgx(dy, dx)
FROM statistics_agg_test
----
0 0 0

statement OK
TRUNCATE statistics_agg_test

query R
SELECT regr_avgx(y, x) FROM statistics_agg_test
----
NULL

subtest regr_avgy

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx)
VALUES (0.0,   0.09561,    1,   10,   0.0, 0.09561),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (42.0,   324.78,    2,   25,  42.0,  324.78),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (56.0,      7.8,    3,   40,  56.0,     7.8),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (100.0,  99.097,    4,  100, 100.0,  99.097),
       (NULL,     NULL, NULL, NULL,  NULL,    NULL);

query FFFFF
SELECT regr_avgy(y, x), regr_avgy(int_y, int_x), regr_avgy(y, int_x), regr_avgy(int_y, x), regr_avgy(dy, dx)
FROM statistics_agg_test
----
65.2  3  65.2  3  65.2

query FFFF
SELECT regr_avgy(y, dx), regr_avgy(int_y, dx), regr_avgy(dy, int_x), regr_avgy(dy, x)
FROM statistics_agg_test
----
65.2  3  65.2  65.2

query F
SELECT regr_avgy(DISTINCT y, x)
FROM statistics_agg_test
----
49.5

query F
SELECT CAST(regr_avgy(DISTINCT y, x) FILTER (WHERE x > 3 AND y < 100) AS decimal)
FROM statistics_agg_test
----
49

query error pq: unknown signature: regr_avgy\(string, string\)
SELECT regr_avgy(y::string, x::string) FROM statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x) VALUES
  (1.797693134862315708145274237317043567981e+408, 0, 0, 0)

query error float out of range
SELECT regr_avgy(y, x), regr_avgy(int_y, int_x) FROM statistics_agg_test

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, 10.0, 1, 10, 1.0, 10.0),
  (2.0, 20.0, 2, 20, 2.0, 20.0)

query FFF
SELECT regr_avgy(y, x), regr_avgy(int_y, int_x), regr_avgy(dy, dx)
FROM statistics_agg_test
----
1.5  1.5  1.5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0,  10.0, 1,  10, 1.0,  10.0),
  (2.0, -20.0, 2, -20, 2.0, -20.0)

query FFF
SELECT regr_avgy(y, x), regr_avgy(int_y, int_x), regr_avgy(dy, dx)
FROM statistics_agg_test
----
1.5  1.5  1.5

statement OK
TRUNCATE statistics_agg_test

statement OK
INSERT INTO statistics_agg_test (y, x, int_y, int_x, dy, dx) VALUES
  (1.0, -1.0, 1, -1, 1.0, -1.0),
  (1.0,  1.0, 1,  1, 1.0,  1.0)

query FFF
SELECT regr_avgy(y, x), regr_avgy(int_y, int_x), regr_avgy(dy, dx)
FROM statistics_agg_test
----
1  1  1

statement OK
TRUNCATE statistics_agg_test

subtest string_agg

statement OK
CREATE TABLE string_agg_test (
  id INT PRIMARY KEY,
  company_id INT,
  employee STRING
)

query IT colnames
SELECT company_id, string_agg(employee, ',')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg

query IT colnames
SELECT company_id, string_agg(employee::BYTES, b',')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg

query IT colnames
SELECT company_id, string_agg(employee, NULL)
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg

query IT colnames
SELECT company_id, string_agg(employee::BYTES, NULL)
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg

statement OK
INSERT INTO string_agg_test VALUES
  (1, 1, 'A'),
  (2, 2, 'B'),
  (3, 3, 'C'),
  (4, 4, 'D'),
  (5, 3, 'C'),
  (6, 4, 'D'),
  (7, 4, 'D'),
  (8, 4, 'D'),
  (9, 3, 'C'),
  (10, 2, 'B')

# This is a bit strange but the same behavior as PostgreSQL.
query IT rowsort
SELECT company_id, string_agg(employee, employee)
FROM string_agg_test
GROUP BY company_id;
----
1           A
2           BBB
3           CCCCC
4           DDDDDDD

query IT colnames
SELECT company_id, string_agg(employee, ',')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           A
2           B,B
3           C,C,C
4           D,D,D,D

query IT colnames
SELECT company_id, string_agg(DISTINCT employee, ',')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           A
2           B
3           C
4           D

query IT colnames
SELECT company_id, string_agg(employee::BYTES, b',')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           A
2           B,B
3           C,C,C
4           D,D,D,D

query IT colnames
SELECT company_id, string_agg(employee, '')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           A
2           BB
3           CCC
4           DDDD

query IT colnames
SELECT company_id, string_agg(employee::BYTES, b'')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           A
2           BB
3           CCC
4           DDDD

query IT colnames
SELECT company_id, string_agg(employee, NULL)
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           A
2           BB
3           CCC
4           DDDD

query IT colnames
SELECT company_id, string_agg(employee::BYTES, NULL)
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           A
2           BB
3           CCC
4           DDDD

query IT colnames
SELECT company_id, string_agg(NULL::STRING, ',')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           NULL
2           NULL
3           NULL
4           NULL

query IT colnames
SELECT company_id, string_agg(NULL::BYTES, b',')
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           NULL
2           NULL
3           NULL
4           NULL

query IT colnames
SELECT company_id, string_agg(NULL::STRING, NULL)
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           NULL
2           NULL
3           NULL
4           NULL

query IT colnames
SELECT company_id, string_agg(NULL::BYTES, NULL)
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;
----
company_id  string_agg
1           NULL
2           NULL
3           NULL
4           NULL

query error pq: ambiguous call: string_agg\(unknown, unknown\)
SELECT company_id, string_agg(NULL, NULL)
FROM string_agg_test
GROUP BY company_id
ORDER BY company_id;

statement OK
TRUNCATE string_agg_test

statement OK
INSERT INTO string_agg_test VALUES
  (1, 1, 'A'),
  (2, 1, 'B'),
  (3, 1, 'C'),
  (4, 1, 'D')

query IT colnames
SELECT e.company_id, string_agg(e.employee, ', ')
FROM (
  SELECT employee, company_id
  FROM string_agg_test
  ORDER BY employee
  ) AS e
GROUP BY e.company_id
ORDER BY e.company_id;
----
company_id  string_agg
1           A, B, C, D

query IT colnames
SELECT e.company_id, string_agg(e.employee, b', ')
FROM (
  SELECT employee::BYTES, company_id
  FROM string_agg_test
  ORDER BY employee
  ) AS e
GROUP BY e.company_id
ORDER BY e.company_id;
----
company_id  string_agg
1           A, B, C, D

query IT colnames
SELECT e.company_id, string_agg(e.employee, ', ')
FROM (
  SELECT employee, company_id
  FROM string_agg_test
  ORDER BY employee DESC
  ) AS e
GROUP BY e.company_id
ORDER BY e.company_id;
----
company_id  string_agg
1           D, C, B, A

query IT colnames
SELECT e.company_id, string_agg(e.employee, b', ')
FROM (
  SELECT employee::BYTES, company_id
  FROM string_agg_test
  ORDER BY employee DESC
  ) AS e
GROUP BY e.company_id
ORDER BY e.company_id;
----
company_id  string_agg
1           D, C, B, A

query IT colnames
SELECT e.company_id, string_agg(e.employee, NULL)
FROM (
  SELECT employee, company_id
  FROM string_agg_test
  ORDER BY employee DESC
  ) AS e
GROUP BY e.company_id
ORDER BY e.company_id;
----
company_id  string_agg
1           DCBA

query IT colnames
SELECT e.company_id, string_agg(e.employee, NULL)
FROM (
  SELECT employee::BYTES, company_id
  FROM string_agg_test
  ORDER BY employee DESC
  ) AS e
GROUP BY e.company_id
ORDER BY e.company_id;
----
company_id  string_agg
1           DCBA

statement OK
DROP TABLE string_agg_test

# Regression test for #28836.

query T
SELECT string_agg('foo', CAST ((SELECT NULL) AS BYTES)) OVER ();
----
foo

# Regression test for #30166.
statement error pq: array_agg\(\): set-returning functions are not allowed in aggregate
SELECT array_agg(generate_series(1, 2))

# Regression test for #31882.

statement ok
CREATE TABLE uvw (u INT, v INT, w INT, INDEX uvw(u, v, w))

statement ok
INSERT INTO uvw VALUES (1, 2, 3), (1, 2, 3), (3, 2, 1), (3, 2, 3)

query IIT rowsort
SELECT u, v, array_agg(w) AS s FROM (SELECT * FROM uvw ORDER BY w) GROUP BY u, v
----
3  2  {1,3}
1  2  {3,3}

# Regression test for #36433: don't panic with count_agg if a post-render produces an error.

query error lpad
SELECT count(*)::TEXT||lpad('foo', 23984729388383834723984) FROM (VALUES(1));

statement ok
CREATE TABLE tab (
  col1 INT PRIMARY KEY,
  col2 INT,
  col3 STRING,
  arr  INT[]
)

# Ordered aggregations when there are no rows.
query I
SELECT array_agg(col1 ORDER BY col2) FROM TAB
----
NULL

statement ok
INSERT INTO tab VALUES (-3, 7, 'a', '{-3, 7}'), (-2, 6, 'a', '{-2, 6}'), (-1, 5, 'a', '{-1, 5}'),
  (0, 7, 'b', '{0, 7}'), (1, 5, 'b', '{1, 5}'), (2, 6, 'b', '{2, 6}')

query T colnames
SELECT array_agg(col1 ORDER BY col1) FROM tab
----
array_agg
{-3,-2,-1,0,1,2}

query T colnames
SELECT array_cat_agg(arr ORDER BY col1) FROM tab
----
array_cat_agg
{-3,7,-2,6,-1,5,0,7,1,5,2,6}

query T colnames
SELECT array_agg(col1 ORDER BY col2*100+col1) FROM tab
----
array_agg
{-1,1,-2,2,-3,0}

query T colnames
SELECT array_cat_agg(arr ORDER BY col2*100+col1) FROM tab
----
array_cat_agg
{-1,5,1,5,-2,6,2,6,-3,7,0,7}

query T colnames
SELECT json_agg(col1 ORDER BY col1) FROM tab
----
json_agg
[-3, -2, -1, 0, 1, 2]

query T colnames
SELECT jsonb_agg(col1 ORDER BY col1) FROM tab
----
jsonb_agg
[-3, -2, -1, 0, 1, 2]

query T colnames
SELECT jsonb_agg(col1 ORDER BY col2, col1) FROM tab
----
jsonb_agg
[-1, 1, -2, 2, -3, 0]

query T colnames
SELECT concat_agg(col3 ORDER BY col1) FROM tab
----
concat_agg
aaabbb

query T colnames
SELECT concat_agg(col3 ORDER BY col1 DESC) FROM tab
----
concat_agg
bbbaaa

query T colnames
SELECT string_agg(col3, ', ' ORDER BY col3) FROM tab
----
string_agg
a, a, a, b, b, b

query T colnames
SELECT string_agg(col3, ', ' ORDER BY col3 DESC) FROM tab
----
string_agg
b, b, b, a, a, a

query TTT colnames
SELECT array_agg(col1 ORDER BY col1), array_agg(col1 ORDER BY col2, col1), array_agg(col1 ORDER BY col3, col1) FROM tab
----
array_agg         array_agg         array_agg
{-3,-2,-1,0,1,2}  {-1,1,-2,2,-3,0}  {-3,-2,-1,0,1,2}

query TTT colnames
SELECT array_agg(col1 ORDER BY col1), array_agg(col1 ORDER BY col2, col1), col3 FROM tab GROUP BY col3 ORDER BY col3
----
array_agg   array_agg   col3
{-3,-2,-1}  {-1,-2,-3}  a
{0,1,2}     {1,2,0}     b

query TTII colnames
SELECT array_agg(col1 ORDER BY col1), array_agg(col1 ORDER BY col2, col1), count(col3), count(*) FROM tab
----
array_agg         array_agg         count  count
{-3,-2,-1,0,1,2}  {-1,1,-2,2,-3,0}  6      6

query TT colnames
SELECT array_agg(col1 ORDER BY col1), array_agg(col1 ORDER BY col1) FILTER (WHERE col1 < 0) FROM tab
----
array_agg         array_agg
{-3,-2,-1,0,1,2}  {-3,-2,-1}

query TT colnames
SELECT array_agg(col1 ORDER BY col3, col1) FILTER (WHERE col1 < 0), array_agg(col1 ORDER BY col3, col1) FROM tab
----
array_agg   array_agg
{-3,-2,-1}  {-3,-2,-1,0,1,2}

query IT
SELECT count(1), concat_agg(col3 ORDER BY col1) FROM tab
----
6  aaabbb

# Testing pre-projections. Tests when the GroupBy clause has a projection.
query IIIT colnames
SELECT
    *
FROM
    (
        SELECT
            count(1) AS count_1,
            count(lower(col3)) AS count_lower,
            count(upper(col3)) AS count_upper,
            concat_agg(col3 ORDER BY col1) AS concat
        FROM
            tab
        GROUP BY
            upper(col3)
    )
ORDER BY
    concat
----
count_1  count_lower  count_upper  concat
3        3            3            aaa
3        3            3            bbb

# Tests for selecting any columns when grouping by the PK.
statement ok
DELETE FROM ab WHERE true;
INSERT INTO ab VALUES (1,1), (2,1), (3,3), (4, 7)

query I rowsort
SELECT b FROM ab GROUP BY a
----
1
1
3
7

query II rowsort
SELECT a+b, count(*) FROM ab JOIN tab ON b=col2 GROUP BY a
----
11 2

query IIII rowsort
SELECT a, col1, b+col2, count(*) FROM ab JOIN tab ON b=col2 GROUP BY a, col1
----
4  -3  14  1
4  0   14  1

query IIII rowsort
SELECT a, b, count(*), count(col2) FROM ab LEFT JOIN tab ON b=col2 GROUP BY a
----
1  1  1  0
2  1  1  0
3  3  1  0
4  7  2  2

query III rowsort
SELECT a, b, count(*) FROM ab RIGHT JOIN tab ON b=col2 GROUP BY a
----
NULL  NULL  4
4     7     2

# Additional tests for MIN/MAX aggregates with indexes.
statement ok
CREATE TABLE xyz (
  x INT PRIMARY KEY,
  y INT,
  z INT,
  INDEX yz (y, z)
)

statement ok
INSERT INTO xyz VALUES (1, 2, 3), (2, 2, 7), (3, 2, 1), (4, 2, NULL), (5, 3, -1)

query I
SELECT min(z) FROM xyz WHERE y = 2 GROUP BY y
----
1

query I
SELECT min(z) FROM xyz WHERE y = 2 AND z IS NOT NULL GROUP BY y
----
1

query I
SELECT min(z) FROM xyz WHERE y = 2 AND z IS NULL GROUP BY y
----
NULL

query I
SELECT min(z) FROM xyz WHERE y = 100 AND z IS NULL GROUP BY y
----

query I
SELECT max(z) FROM xyz WHERE y = 2 GROUP BY y
----
7

query I
SELECT max(z) FROM xyz WHERE y = 2 AND z IS NOT NULL GROUP BY y
----
7

query I
SELECT max(z) FROM xyz WHERE y = 2 AND z IS NULL GROUP BY y
----
NULL

query I
SELECT max(z) FROM xyz WHERE y = 100 GROUP BY y
----

statement ok
DROP TABLE xyz

# Regression test for #44469 (DistinctOn needs to remap the provided ordering).
statement ok
CREATE TABLE t44469_a (a INT, INDEX (a))

statement ok
CREATE TABLE t44469_b (b INT, INDEX (b))

statement ok
CREATE TABLE t44469_cd (c INT, d INT, INDEX (c, d));

statement ok
SELECT DISTINCT ON (b) b
FROM t44469_a INNER LOOKUP JOIN t44469_b ON a = b INNER LOOKUP JOIN t44469_cd ON c = 1 AND d = a
ORDER BY b

statement ok
DROP TABLE IF EXISTS t;
CREATE TABLE t (x JSONB, y INT);
INSERT INTO t VALUES
  ('{"foo": "bar"}', 5),
  ('{"foo": "bar"}', 10),
  ('[1, 2]', 5),
  ('[1, 2]', 20),
  ('{"foo": "bar", "bar": "baz"}', 5),
  ('{"foo": "bar", "bar": "baz"}', 30),
  ('{"foo": {"bar" : "baz"}}', 5),
  ('{"foo": {"bar" : "baz"}}', 40)

query TT
SELECT x, SUM (y) FROM t GROUP BY (x) ORDER BY SUM (y)
----
{"foo": "bar"}                15
[1, 2]                        25
{"bar": "baz", "foo": "bar"}  35
{"foo": {"bar": "baz"}}       45

# Tests for the 'every' aggregate function.
subtest every

statement ok
CREATE TABLE t_every (x BOOL)

query B
SELECT every (x) FROM t_every
----
NULL

statement ok
INSERT INTO t_every VALUES (true), (true)

query B
SELECT every (x) FROM t_every
----
true

statement ok
INSERT INTO t_every VALUES (NULL), (true)

query B
SELECT every (x) FROM t_every
----
true

statement ok
INSERT INTO t_every VALUES (false), (NULL)

query B
SELECT every (x) FROM t_every
----
false

statement ok
TRUNCATE t_every;
INSERT INTO t_every VALUES (false)

query B
SELECT every (x) FROM t_every
----
false

statement ok
TRUNCATE t_every;

statement ok
INSERT INTO t_every VALUES (NULL), (NULL), (NULL)

query B
SELECT every (x) FROM t_every
----
NULL

# Regression test for #46423: this query should return no rows.
statement ok
CREATE TABLE t46423(c0 INT);
INSERT INTO t46423(c0) VALUES(0)

query T
SELECT c0 FROM t46423 GROUP BY c0 HAVING NOT (variance(0) IS NULL);
----

# Regression test for #45453 - make sure that we don't incorrectly treat the
# aggregation as scalar.
statement ok
CREATE TABLE t45453(c INT)

query I
SELECT count(*) FROM t45453 GROUP BY 0 + 0
----

# Tests for the bit_and and bit_or aggregate functions.

subtest bit_aggregates

statement ok
DROP TABLE IF EXISTS vals

statement ok
CREATE TABLE vals (
  v VARBIT,
  b BIT(8)
)

# Testing that bit aggregate functions return NULL if there are no rows.

query T
SELECT bit_and(v) FROM vals
----
NULL

query T
SELECT bit_or(v) FROM vals
----
NULL

# Testing that bit aggregate functions do not trigger aggregation on a constant
# with a source that has no rows.

query T
SELECT bit_and('1000'::varbit) FROM vals
----
NULL

query T
SELECT bit_or('1000'::varbit) FROM vals
----
NULL

# Testing that bit aggregate functions trigger aggregation and computation on a
# constant with no source.

query TTT
SELECT bit_and('1'::varbit), bit_and('1000'::bit(4)), bit_and('1010'::varbit)
----
1 1000 1010

query TTT
SELECT bit_or('1'::varbit), bit_or('1000'::bit(4)), bit_or('1010'::varbit)
----
1 1000 1010

# Testing that bit aggregate functions return null given a null.

query T
SELECT bit_and(NULL::varbit)
----
NULL

query T
SELECT bit_or(NULL::varbit)
----
NULL

# Testing successful bitwise aggregation over a sequence of non-nulls.

statement ok
INSERT INTO vals VALUES
('11111110'::varbit, '11111110'::bit(8)),
('01111111'::varbit, '01111110'::bit(8)),
('10111111'::varbit, '10111110'::bit(8)),
('11011111'::varbit, '11011110'::bit(8)),
('11101111'::varbit, '11101110'::bit(8))

query TT
SELECT bit_and(v), bit_and(b) FROM vals
----
00001110 00001110

query TT
SELECT bit_or(v), bit_or(b) FROM vals
----
11111111 11111110

# Testing bit aggregate functions over a sequence with nulls and non-nulls.

statement ok
INSERT INTO vals VALUES
(NULL::varbit, NULL::bit),
(NULL::varbit, NULL::bit)

query TT
SELECT bit_and(v), bit_and(b) FROM vals
----
00001110 00001110

query TT
SELECT bit_or(v), bit_or(b) FROM vals
----
11111111 11111110

# Testing bit aggregate functions over a sequence with all nulls.

statement ok
DELETE FROM vals

statement ok
INSERT INTO vals VALUES
(NULL::varbit),
(NULL::varbit),
(NULL::varbit),
(NULL::varbit)

query T
SELECT bit_and(v) FROM vals
----
NULL

query T
SELECT bit_or(v) FROM vals
----
NULL

# Testing that bit aggregate functions return an error when given an uncasted null.

statement error ambiguous call: bit_and\(unknown\), candidates are
SELECT bit_and(NULL)

statement error ambiguous call: bit_or\(unknown\), candidates are
SELECT bit_or(NULL)

# Testing that an error is returned when bit aggregate functions are called on bit
# arrays of different sizes.

statement error cannot AND bit strings of different sizes
SELECT bit_and(x::varbit) FROM (VALUES ('1'), ('11')) t(x)

statement error cannot AND bit strings of different sizes
SELECT bit_and(x) FROM (VALUES ('100'::bit(3)), ('101010111'::varbit)) t(x)

statement error cannot AND bit strings of different sizes
SELECT bit_and(x) FROM (VALUES (''::varbit), ('1'::varbit)) t(x)

statement error cannot OR bit strings of different sizes
SELECT bit_or(x::varbit) FROM (VALUES ('1'), ('11')) t(x)

statement error cannot OR bit strings of different sizes
SELECT bit_or(x) FROM (VALUES ('100'::bit(3)), ('101010111'::varbit)) t(x)

statement error cannot OR bit strings of different sizes
SELECT bit_or(x) FROM (VALUES (''::varbit), ('1'::varbit)) t(x)

# Regression test for #46981 (not propagating an error which occurs when
# rendering the single output row of countRows aggregate).
statement ok
CREATE TABLE t46981_0(c0 INT);
CREATE VIEW v46981_0(c0) AS SELECT count_rows() FROM t46981_0

statement error parsing regexp: missing argument to repetition operator: `\+`
SELECT * FROM v46981_0 WHERE '' !~ '+'

# Testing ordered-set aggregations.
subtest ordered_set_aggregates

statement ok
DROP TABLE IF EXISTS osagg

statement ok
CREATE TABLE osagg (
  f FLOAT,
  s STRING,
  i INTERVAL
)

statement ok
INSERT INTO osagg VALUES
(NULL, NULL, NULL),
(0.00, NULL, '1 months'),
(0.05, NULL, '1 months'),
(1.0, 'v1', '1 year 1 months'),
(3.0, 'v3', '1 year 3 months'),
(5.0, 'v5', '1 year 5 months'),
(2.0, 'v2', '1 year 2 months'),
(4.0, 'v4', '1 year 4 months'),
(6.0, 'v6', '1 year 6 months')

# Test basic functionality.
query R
SELECT
  percentile_disc(0.95) WITHIN GROUP (ORDER BY f)
FROM osagg
----
6

query RT
SELECT
  percentile_disc(0.95) WITHIN GROUP (ORDER BY f),
  percentile_disc(0.95) WITHIN GROUP (ORDER BY s)
FROM osagg
----
6  v6

query RRT
SELECT
  percentile_cont(0.95) WITHIN GROUP (ORDER BY f),
  percentile_cont(0.95) WITHIN GROUP (ORDER BY f DESC),
  percentile_cont(0.95) WITHIN GROUP (ORDER BY i)
FROM osagg
----
5.6499999999999995  0.017500000000000026  1 year 5 mons 24 days 18:00:00

# Test with null values.
query TRR
SELECT
  percentile_disc(0.00) WITHIN GROUP (ORDER BY s),
  percentile_disc(0.1) WITHIN GROUP (ORDER BY f),
  percentile_disc(0.15) WITHIN GROUP (ORDER BY f)
FROM osagg
----
v1  0  0.05

query RRTT
SELECT
  percentile_cont(0.05) WITHIN GROUP (ORDER BY f),
  percentile_cont(0.05) WITHIN GROUP (ORDER BY f DESC),
  percentile_cont(0.05) WITHIN GROUP (ORDER BY i),
  percentile_cont(0.05) WITHIN GROUP (ORDER BY i DESC)
FROM osagg
----
0.017500000000000005  5.65  1 mon  1 year 5 mons 24 days 18:00:00

# Test with different percent values.
query RRR
SELECT
  percentile_disc(0.25) WITHIN GROUP (ORDER BY f),
  percentile_disc(0.5) WITHIN GROUP (ORDER BY f),
  percentile_disc(0.75) WITHIN GROUP (ORDER BY f)
FROM osagg
----
0.05  2  4

query RRR
SELECT
  percentile_cont(0.25) WITHIN GROUP (ORDER BY f),
  percentile_cont(0.5) WITHIN GROUP (ORDER BY f),
  percentile_cont(0.75) WITHIN GROUP (ORDER BY f)
FROM osagg
----
0.7625  2.5  4.25

# Test with arrays.
query T
SELECT
  percentile_disc(ARRAY[0.25]::float[]) WITHIN GROUP (ORDER BY f)
FROM osagg
----
{0.05}

query T
SELECT
  percentile_disc(ARRAY[0.25, 0.5, 0.75]::float[]) WITHIN GROUP (ORDER BY f)
FROM osagg
----
{0.05,2,4}

query T
SELECT
  percentile_cont(ARRAY[0.25, 0.5, 0.75]::float[]) WITHIN GROUP (ORDER BY f)
FROM osagg
----
{0.7625,2.5,4.25}

query T
SELECT
  percentile_disc(ARRAY[0.25, 0.5, 0.75]::float[]) WITHIN GROUP (ORDER BY i)
FROM osagg
----
{"1 mon","1 year 2 mons","1 year 4 mons"}

statement error percentile value 1.250000 is not between 0 and 1
SELECT
  percentile_disc(ARRAY[1.25]::float[]) WITHIN GROUP (ORDER BY f)
FROM osagg

statement error percentile value 1.250000 is not between 0 and 1
SELECT
  percentile_disc(ARRAY[0.25, 0.50, 1.25]::float[]) WITHIN GROUP (ORDER BY f)
FROM osagg

query T
SELECT
  percentile_cont(ARRAY[0.25, 0.5, 0.75]::float[]) WITHIN GROUP (ORDER BY i)
FROM osagg
----
{"10 mons 3 days 22:30:00","1 year 2 mons 20 days 06:00:00","1 year 4 mons 12 days 18:00:00"}

# Test that the view query is not broken by the overriding logic in the optbuilder.
statement ok
CREATE VIEW osagg_view (disc, cont) AS
  SELECT percentile_disc(0.50) WITHIN GROUP (ORDER BY f),
         percentile_cont(0.50) WITHIN GROUP (ORDER BY f DESC) FROM osagg

query TT
SHOW CREATE osagg_view
----
osagg_view  CREATE VIEW public.osagg_view (
              disc,
              cont
            ) AS SELECT
                percentile_disc(0.50)WITHIN GROUP (ORDER BY f),
                percentile_cont(0.50)WITHIN GROUP (ORDER BY f DESC)
              FROM
                test.public.osagg

# Test malformed ordered-set aggregation.
statement error ordered-set aggregations must have a WITHIN GROUP clause containing one ORDER BY column
SELECT percentile_disc(0.50) FROM osagg

statement error ordered-set aggregations must have a WITHIN GROUP clause containing one ORDER BY column
SELECT percentile_cont(0.50) FROM osagg

# Tests for min/max on collated strings.
statement ok
CREATE TABLE t_collate (x STRING COLLATE en_us);
INSERT INTO t_collate VALUES ('hi' COLLATE en_us), ('hello' COLLATE en_us), ('howdy' COLLATE en_us)

query TT
SELECT min(x), max(x) FROM t_collate
----
hello howdy

query TT
SELECT min(NULL::STRING COLLATE en_us), max(NULL::STRING COLLATE en_us)
----
NULL NULL

subtest json_object_agg

statement ok
CREATE TABLE profiles (
  userid int not null,
  property string not null,
  value string not null
)

statement ok
INSERT INTO profiles VALUES
(1, 'email', 'user1@gmail.com'),
(1, 'phone', '111111111111111'),
(1, 'home_page', 'user1.org1.com'),
(2, 'email', 'user2@gmail.com'),
(2, 'phone', '222222222222222'),
(2, 'home_page', 'user2.org1.com')

query T
SELECT json_object_agg(property, value) FROM profiles GROUP BY userid ORDER BY userid
----
{"email": "user1@gmail.com", "home_page": "user1.org1.com", "phone": "111111111111111"}
{"email": "user2@gmail.com", "home_page": "user2.org1.com", "phone": "222222222222222"}

statement ok
CREATE TABLE users (
  userid int primary key,
  user_name string not null
)

statement ok
CREATE TABLE user_networks(
  userid int not null references users(userid),
  network_name string not null,
  user_account_id string not null
)

statement ok
INSERT INTO users VALUES
(1, 'Alice'),
(2, 'Bob')

statement ok
INSERT INTO user_networks VALUES
('1', 'Facebook', 'Alice_fb'),
('1', 'Twitter', '@Alice'),
('1', 'Instagram', 'AliceInst'),
('2', 'Facebook', 'Bob_fb'),
('2', 'LinkedIn', 'Bob The Builder')

query T
SELECT json_object_agg(user_name, networks) FROM
  (SELECT u.userid as userid, u.user_name, json_object_agg(un.network_name, un.user_account_id) as networks FROM
    users u, user_networks un
      WHERE u.userid = un.userid
      GROUP BY u.userid
      ORDER BY u.userid)
  GROUP BY userid
  ORDER BY userid
----
{"Alice": {"Facebook": "Alice_fb", "Instagram": "AliceInst", "Twitter": "@Alice"}}
{"Bob": {"Facebook": "Bob_fb", "LinkedIn": "Bob The Builder"}}

statement error pgcode 22004 null value not allowed for object key
SELECT json_object_agg(null, null)

statement error pgcode 22004 null value not allowed for object key
SELECT json_object_agg(null, 1)

statement ok
CREATE TABLE persons(
  id string PRIMARY KEY,
  name string NOT NULL
)

statement ok
CREATE TABLE companies(
  id string PRIMARY KEY,
  company_name string NOT NULL
)

statement ok
CREATE TABLE jobs(
  id string PRIMARY KEY,
  person_id string NOT NULL REFERENCES persons(id),
  company_id string NOT NULL REFERENCES companies(id),
  job_title string NOT NULL
)

statement ok
INSERT INTO persons VALUES
('1', 'Alice'),
('2', 'Bob')

statement ok
INSERT INTO companies VALUES
('1', 'Facebook'),
('2', 'Google'),
('3', 'Twitter'),
('4', 'IBM'),
('5', 'Cockroach Labs')

statement ok
INSERT INTO jobs VALUES
('1', '1', '1', 'Developer'),
('2', '1', '2', 'Full Stack'),
('3', '1', '4', 'Research'),
('4', '2', '3', 'Frontend'),
('5', '2', '5', 'DB Developer'),
('6', '2', '2', 'DevOps')

query T
SELECT json_build_object(p.name, json_object_agg(c.company_name, j.job_title))
FROM persons p
  LEFT OUTER JOIN jobs j ON p.id = j.person_id
    LEFT OUTER JOIN companies c ON c.id = j.company_id
GROUP BY p.name
ORDER BY p.name
----
{"Alice": {"Facebook": "Developer", "Google": "Full Stack", "IBM": "Research"}}
{"Bob": {"Cockroach Labs": "DB Developer", "Google": "DevOps", "Twitter": "Frontend"}}

statement ok
CREATE TABLE blog(
  id string NOT NULL,
  name string NOT NULL
)

statement ok
INSERT INTO blog VALUES ('1', 'Test Blog')

statement ok
CREATE TABLE blog_properties(
  blog_id string NOT NULL,
  property_name string NOT NULL,
  property_value string NOT NULL
)

statement ok
INSERT INTO blog_properties VALUES
('1', 'Application Name', 'Instagram'),
('1', 'Admin Email', 'admin@email.com'),
('1', 'Blog Name', 'Wordpress Blog'),
('1', 'Application Name', 'Twitter'),
('1', 'KeepAlive', 'true'),
('1', 'Session Timeout', '1000ms')

query T
SELECT json_build_object(b.name, json_object_agg(p.property_name, p.property_value ORDER BY p.property_value))
FROM blog b, blog_properties p WHERE b.id = p.blog_id
GROUP BY b.name
----
{"Test Blog": {"Admin Email": "admin@email.com", "Application Name": "Twitter", "Blog Name": "Wordpress Blog", "KeepAlive": "true", "Session Timeout": "1000ms"}}

# Regression test for incorrectly handling DISTINCT ordered aggregation in the
# vectorized engine (#55776).
statement ok
CREATE TABLE t55776 (i INT8 PRIMARY KEY, y FLOAT8, x FLOAT8);
INSERT INTO t55776 (i, y, x) VALUES
  (1, 1.0, 1),
  (2, 2.0, 2),
  (3, 1.0, 2),
  (4, 1.0, 2),
  (5, 3.0, 2);

query FI
SELECT corr(DISTINCT y, x), count(DISTINCT y) FROM t55776
----
0.522232967867094 3

# Regression test for the vectorized hash aggregator not maintaining the
# required output ordering when spilling to disk (#63159).
statement ok
CREATE TABLE t63159 (a INT, b INT, INDEX(a) STORING (b));
INSERT INTO t63159 VALUES (1,1), (3,3), (2,2), (5,5), (0,0), (1,1);

query III
SELECT a, b, count(*) FROM t63159 GROUP BY a,b ORDER BY a
----
0  0  1
1  1  2
2  2  1
3  3  1
5  5  1

# Regression test for the vectorized hash aggregator using incorrect type schema
# when planning an external sort to maintain the required ordering.
statement ok
CREATE TABLE t63436 (a INT, b FLOAT, c DECIMAL, INDEX(a));
SELECT count(*) FROM t63436@t63436_a_idx GROUP BY b, c ORDER BY c;

# Regression test for #64319. Percentiles of constants should not panic.
subtest 63436

query I
SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY 33) FROM osagg
----
33

query R
SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY 33.0) FROM osagg
----
33.0

query I
SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY 33::INT) FROM osagg
----
33

query I
SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY '33'::INT) FROM osagg
----
33

# Note: In this case Postgres returns "ERROR: 42804: could not determine
# polymorphic type because input has type unknown". However, Postgres does allow
# ... (ORDER BY s) ... where s is a TEXT column. It would require additional
# complexity for us to error in this case, so we return a result instead.
query T
SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY 'foo') FROM osagg
----
foo

query T
SELECT percentile_disc(0.95) WITHIN GROUP (ORDER BY current_database()) FROM osagg
----
test

subtest corrupt_combine

statement OK
CREATE TABLE corrupt_combine (
  y float,
  x float
)

statement OK
INSERT INTO corrupt_combine (y, x) VALUES
  (1.0, 10.0),
  (2.0, 25.0),
  (3.0, 35.0),
  (4.0, 50.0),
  (5.0, 70.0),
  (6.0, 70.0)

# PR #73062 introduced a bug that caused
# finalRegressionAccumulatorDecimalBase.combine function to corrupt values in
# regressionAccumulatorDecimalBase that are used across iterations (n, sx, sxx,
# sy, syy, sxy). Depending on the order of two local accumulators the result
# from the second accumulator could be directly assigned to the mentioned fields
# when "this.n == 0". In this case two or more functions in the bucket shared
# the same values and repeated the calculation
# (see aggregator.accumulateRowIntoBucket).
# This test checks that multiple aggregate functions in the same bucket preserve
# their values across multiple "combine".

query FFFF
select covar_pop(y, x), covar_samp(y, x), regr_sxx(y, x), regr_syy(y, x) from corrupt_combine
----
37.5 45 2983.333333333333 17.5

# Regression test for #88993 where a limit pushed down into a union of scans
# caused incorrect query results.
statement ok
CREATE TABLE t2 (
  a INT,
  b INT,
  c INT,
  INDEX (b, c, a)
);
INSERT INTO t2 (a, b, c) VALUES (1, 10, 20), (0, 11, 100);

query I
SELECT min(a) FROM t2 WHERE (b <= 11 AND c < 50) OR (b = 11 AND c = 50) OR (b >= 11 AND c > 50)
----
0

# Regression test for incorrectly ignoring NULLS LAST in aggregate functions (#91295).
statement ok
CREATE TABLE nulls_last_test (
    id INT NULL,
    k INT NULL,
    v VARCHAR(3) NULL
);
INSERT INTO nulls_last_test VALUES
  (1, 1, 'foo'),
  (2, null, null),
  (null, null, 'bar'),
  (3, 3, 'baz');

query T
SELECT array_agg(id ORDER BY id NULLS LAST) FROM nulls_last_test
----
{1,2,3,NULL}

# It should work with tuples too.
query T
SELECT array_agg((k, v) ORDER BY (k, v)) FROM nulls_last_test
----
{"(,)","(,bar)","(1,foo)","(3,baz)"}

query T
SELECT array_agg((k, v) ORDER BY (k, v) NULLS LAST) FROM nulls_last_test
----
{"(1,foo)","(3,baz)","(,bar)","(,)"}

# Tuples with projections also work.
query T
SELECT array_agg((k, v) ORDER BY (k+1, v||'foo')) FROM nulls_last_test;
----
{"(,)","(,bar)","(1,foo)","(3,baz)"}

query T
SELECT array_agg((k, v) ORDER BY (k+1, v||'foo') NULLS LAST) FROM nulls_last_test;
----
{"(1,foo)","(3,baz)","(,bar)","(,)"}

# Using the session variable, we should get results that match Postgres.
statement ok
SET null_ordered_last = true

query T
SELECT array_agg(id ORDER BY id) FROM nulls_last_test
----
{1,2,3,NULL}

query T
SELECT array_agg((k, v) ORDER BY (k, v)) FROM nulls_last_test
----
{"(1,foo)","(3,baz)","(,bar)","(,)"}

# TODO(#93558): This does not match Postgres.
# Postgres returns:
#   {"(1,foo)","(3,baz)","(,bar)","(,)"}
query T
SELECT array_agg((k, v) ORDER BY (k, v) NULLS FIRST) FROM nulls_last_test
----
{"(,)","(,bar)","(1,foo)","(3,baz)"}

query T
SELECT array_agg((k, v) ORDER BY (k+1, v||'foo')) FROM nulls_last_test;
----
{"(1,foo)","(3,baz)","(,bar)","(,)"}

# TODO(#93558): This does not match Postgres.
# Postgres returns:
#   {"(1,foo)","(3,baz)","(,bar)","(,)"}
query T
SELECT array_agg((k, v) ORDER BY (k+1, v||'foo') NULLS FIRST) FROM nulls_last_test;
----
{"(,)","(,bar)","(1,foo)","(3,baz)"}

# TODO(#93558): This test case is broken and shows the limit of our
# optimizer-based approach for NULLS LAST.
# Postgres returns:
#   {"(1,1)","(1,)","(,)",NULL}
query T
WITH t (x, y) AS (
  VALUES
    ((1, 1), 1),
    ((NULL::RECORD), 2),
    ((1, NULL::INT), 3),
    ((NULL::INT, NULL::INT), 4)
)
SELECT array_agg(x ORDER BY x)
FROM t;
----
{"(1,)","(1,1)",NULL,"(,)"}

# TODO(#93558): This test case is broken and shows the limit of our
# optimizer-based approach for NULLS LAST.
# Postgres returns:
#   {NULL,"(1,1)","(1,)","(,)"}
query T
WITH t (x, y) AS (
  VALUES
    ((1, 1), 1),
    ((NULL::RECORD), 2),
    ((1, NULL::INT), 3),
    ((NULL::INT, NULL::INT), 4)
)
SELECT array_agg(x ORDER BY x NULLS FIRST)
FROM t;
----
{NULL,"(,)","(1,)","(1,1)"}

statement ok
RESET null_ordered_last

# Regression test for #109629. Implicit casts should be added during
# type-checking when necessary.
query T
SELECT array_cat_agg(ARRAY[(1::INT,), (1::FLOAT8,)]);
----
{(1),(1)}

query T
SELECT array_cat_agg(
  ARRAY[(416644234484367676:::INT8,),(NULL,),((-0.12116245180368423):::FLOAT8,)]
)
----
{(4.166442344843677e+17),(),(-0.12116245180368423)}

statement ok
CREATE TABLE __test_array_agg(a TEXT PRIMARY KEY, b TEXT, c TEXT);
INSERT INTO __test_array_agg VALUES ('a', 'b', 'c'), ('aa', 'bb', 'cc'), ('aaa', 'bbb', 'ccc');

query T
SELECT array_agg(array[a, b, c]) FROM __test_array_agg;
----
{"{a,b,c}","{aa,bb,cc}","{aaa,bbb,ccc}"}

# array_agg with multi-dimensional arrays as inputs is unsupported (although
# postgres supports them).
query error unknown signature: array_agg\(int\[\]\[\]\)
WITH
    foo(f) AS (SELECT array_agg(x) FROM generate_series(1, 3) g(x)),
    bar(b) AS (SELECT array_agg(f) FROM foo, generate_series(1, 3)),
    baz(z) AS (SELECT array_agg(b) FROM bar, generate_series(1, 3))
SELECT z FROM baz;

# Regression test for incorrectly picking row-by-row ordered aggregator when
# some optimizer rules are disabled (#124101).
statement ok
CREATE TABLE t124101 (
  a INT,
  b INT,
  c INT,
  PRIMARY KEY (a, b),
  UNIQUE (a, c)
);
INSERT INTO t124101 VALUES (0, 0, 1), (0, 1, 2);
SET testing_optimizer_disable_rule_probability = 1.000000;

query II
SELECT a, sum_int(c) AS s FROM t124101 GROUP BY a, c ORDER BY a, a, s, c LIMIT 2;
----
0  1
0  2

statement ok
RESET testing_optimizer_disable_rule_probability;

# Regression test for erroring out on FLOAT4 type and percentile_cont (#90519).
statement ok
CREATE TABLE t90519 (i int);
INSERT INTO t90519 VALUES (1),(2),(3),(4);

query R
SELECT percentile_cont(ARRAY[.4::FLOAT]) WITHIN GROUP (ORDER BY i::FLOAT4) FROM t90519;
----
{2.2}
