# LogicTest: local

statement ok
CREATE TABLE kv (
  k   INT PRIMARY KEY,
  v   INT,
  w   INT,
  s   STRING,
  arr STRING[]
)

query T
EXPLAIN (TYPES) SELECT min(1), max(1), count(NULL), sum_int(1), avg(1), sum(1), stddev(1), variance(1),
bool_and(true), bool_or(false), xor_agg(b'\x01'), corr(1, 1), covar_pop(1, 1), covar_samp(1, 1), sqrdiff(1),
regr_intercept(1, 1), regr_r2(1, 1), regr_slope(1, 1), regr_sxx(1, 1), regr_sxy(1, 1), regr_syy(1, 1),
regr_count(1, 1), regr_avgx(1, 1), regr_avgy(1, 1)
FROM kv
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int, max int, count int, sum_int int, avg decimal, sum decimal, stddev decimal, variance decimal, bool_and bool, bool_or bool, xor_agg bytes, corr float, covar_pop float, covar_samp float, sqrdiff decimal, regr_intercept float, regr_r2 float, regr_slope float, regr_sxx float, regr_sxy float, regr_syy float, regr_count int, regr_avgx float, regr_avgy float)
│ estimated row count: 1 (missing stats)
│ aggregate 0: min(column10)
│ aggregate 1: max(column10)
│ aggregate 2: count(column13)
│ aggregate 3: sum_int(column10)
│ aggregate 4: avg(column10)
│ aggregate 5: sum(column10)
│ aggregate 6: stddev(column10)
│ aggregate 7: variance(column10)
│ aggregate 8: bool_and(column20)
│ aggregate 9: bool_or(column22)
│ aggregate 10: xor_agg(column24)
│ aggregate 11: corr(column10, column10)
│ aggregate 12: covar_pop(column10, column10)
│ aggregate 13: covar_samp(column10, column10)
│ aggregate 14: sqrdiff(column10)
│ aggregate 15: regr_intercept(column10, column10)
│ aggregate 16: regr_r2(column10, column10)
│ aggregate 17: regr_slope(column10, column10)
│ aggregate 18: regr_sxx(column10, column10)
│ aggregate 19: regr_sxy(column10, column10)
│ aggregate 20: regr_syy(column10, column10)
│ aggregate 21: count_rows()
│ aggregate 22: regr_avgx(column10, column10)
│ aggregate 23: regr_avgy(column10, column10)
│
└── • render
    │ columns: (column10 int, column13 unknown, column20 bool, column22 bool, column24 bytes)
    │ render column10: (1)[int]
    │ render column13: (NULL)[unknown]
    │ render column20: (true)[bool]
    │ render column22: (false)[bool]
    │ render column24: ('\x01')[bytes]
    │
    └── • scan
          columns: ()
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (TYPES) SELECT min(v), max(v), count(v), sum_int(1), avg(v), sum(v), stddev(v), variance(v),
bool_and(v = 1), bool_or(v = 1), xor_agg(s::bytes), corr(v, k), covar_pop(v, k), covar_samp(v, k), sqrdiff(v),
regr_intercept(1, 1), regr_r2(1, 1), regr_slope(1, 1), regr_sxx(1, 1), regr_sxy(1, 1), regr_syy(1, 1),
regr_count(1, 1), regr_avgx(1, 1), regr_avgy(1, 1)
FROM kv
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int, max int, count int, sum_int int, avg decimal, sum decimal, stddev decimal, variance decimal, bool_and bool, bool_or bool, xor_agg bytes, corr float, covar_pop float, covar_samp float, sqrdiff decimal, regr_intercept float, regr_r2 float, regr_slope float, regr_sxx float, regr_sxy float, regr_syy float, regr_count int, regr_avgx float, regr_avgy float)
│ estimated row count: 1 (missing stats)
│ aggregate 0: min(v)
│ aggregate 1: max(v)
│ aggregate 2: count(v)
│ aggregate 3: sum_int(column13)
│ aggregate 4: avg(v)
│ aggregate 5: sum(v)
│ aggregate 6: stddev(v)
│ aggregate 7: variance(v)
│ aggregate 8: bool_and(column19)
│ aggregate 9: bool_or(column19)
│ aggregate 10: xor_agg(column22)
│ aggregate 11: corr(v, k)
│ aggregate 12: covar_pop(v, k)
│ aggregate 13: covar_samp(v, k)
│ aggregate 14: sqrdiff(v)
│ aggregate 15: regr_intercept(column13, column13)
│ aggregate 16: regr_r2(column13, column13)
│ aggregate 17: regr_slope(column13, column13)
│ aggregate 18: regr_sxx(column13, column13)
│ aggregate 19: regr_sxy(column13, column13)
│ aggregate 20: regr_syy(column13, column13)
│ aggregate 21: count_rows()
│ aggregate 22: regr_avgx(column13, column13)
│ aggregate 23: regr_avgy(column13, column13)
│
└── • render
    │ columns: (column13 int, column19 bool, column22 bytes, k int, v int)
    │ render column13: (1)[int]
    │ render column19: ((v)[int] = (1)[int])[bool]
    │ render column22: ((s)[string]::BYTES)[bytes]
    │ render k: (k)[int]
    │ render v: (v)[int]
    │
    └── • scan
          columns: (k int, v int, s string)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

# Aggregate functions trigger aggregation and computation when there is no source.
query T
EXPLAIN (TYPES) SELECT min(1), count(NULL), max(1), sum_int(1), avg(1)::float, sum(1), stddev(1), variance(1),
bool_and(true), bool_or(true), to_hex(xor_agg(b'\x01')), corr(1, 1), covar_pop(1, 1), covar_samp(1, 1), sqrdiff(1),
regr_intercept(1, 1), regr_r2(1, 1), regr_slope(1, 1), regr_sxx(1, 1), regr_sxy(1, 1), regr_syy(1, 1),
regr_count(1, 1), regr_avgx(1, 1), regr_avgy(1, 1)
----
distribution: local
vectorized: true
·
• render
│ columns: (min int, count int, max int, sum_int int, avg float, sum decimal, stddev decimal, variance decimal, bool_and bool, bool_or bool, to_hex string, corr float, covar_pop float, covar_samp float, sqrdiff decimal, regr_intercept float, regr_r2 float, regr_slope float, regr_sxx float, regr_sxy float, regr_syy float, regr_count int, regr_avgx float, regr_avgy float)
│ render avg: ((avg)[decimal]::FLOAT8)[float]
│ render to_hex: (to_hex((xor_agg)[bytes]))[string]
│ render min: (min)[int]
│ render count: (count)[int]
│ render max: (max)[int]
│ render sum_int: (sum_int)[int]
│ render sum: (sum)[decimal]
│ render stddev: (stddev)[decimal]
│ render variance: (variance)[decimal]
│ render bool_and: (bool_and)[bool]
│ render bool_or: (bool_or)[bool]
│ render corr: (corr)[float]
│ render covar_pop: (covar_pop)[float]
│ render covar_samp: (covar_samp)[float]
│ render sqrdiff: (sqrdiff)[decimal]
│ render regr_intercept: (regr_intercept)[float]
│ render regr_r2: (regr_r2)[float]
│ render regr_slope: (regr_slope)[float]
│ render regr_sxx: (regr_sxx)[float]
│ render regr_sxy: (regr_sxy)[float]
│ render regr_syy: (regr_syy)[float]
│ render regr_count: (count_rows)[int]
│ render regr_avgx: (regr_avgx)[float]
│ render regr_avgy: (regr_avgy)[float]
│
└── • group (scalar)
    │ columns: (min int, count int, max int, sum_int int, avg decimal, sum decimal, stddev decimal, variance decimal, bool_and bool, bool_or bool, xor_agg bytes, corr float, covar_pop float, covar_samp float, sqrdiff decimal, regr_intercept float, regr_r2 float, regr_slope float, regr_sxx float, regr_sxy float, regr_syy float, count_rows int, regr_avgx float, regr_avgy float)
    │ estimated row count: 1
    │ aggregate 0: min(column1)
    │ aggregate 1: count(column3)
    │ aggregate 2: max(column1)
    │ aggregate 3: sum_int(column1)
    │ aggregate 4: avg(column1)
    │ aggregate 5: sum(column1)
    │ aggregate 6: stddev(column1)
    │ aggregate 7: variance(column1)
    │ aggregate 8: bool_and(column11)
    │ aggregate 9: bool_or(column11)
    │ aggregate 10: xor_agg(column14)
    │ aggregate 11: corr(column1, column1)
    │ aggregate 12: covar_pop(column1, column1)
    │ aggregate 13: covar_samp(column1, column1)
    │ aggregate 14: sqrdiff(column1)
    │ aggregate 15: regr_intercept(column1, column1)
    │ aggregate 16: regr_r2(column1, column1)
    │ aggregate 17: regr_slope(column1, column1)
    │ aggregate 18: regr_sxx(column1, column1)
    │ aggregate 19: regr_sxy(column1, column1)
    │ aggregate 20: regr_syy(column1, column1)
    │ aggregate 21: count_rows()
    │ aggregate 22: regr_avgx(column1, column1)
    │ aggregate 23: regr_avgy(column1, column1)
    │
    └── • values
          columns: (column1 int, column3 unknown, column11 bool, column14 bytes)
          size: 4 columns, 1 row
          row 0, expr 0: (1)[int]
          row 0, expr 1: (NULL)[unknown]
          row 0, expr 2: (true)[bool]
          row 0, expr 3: ('\x01')[bytes]

query T
EXPLAIN (TYPES) SELECT count(*), k FROM kv GROUP BY 2
----
distribution: local
vectorized: true
·
• group (streaming)
│ columns: (count int, k int)
│ estimated row count: 1,000 (missing stats)
│ aggregate 0: count_rows()
│ group by: k
│ ordered: +k
│
└── • scan
      columns: (k int)
      ordering: +k
      estimated row count: 1,000 (missing stats)
      table: kv@kv_pkey
      spans: FULL SCAN

# Selecting and grouping on a more complex expression works.
query T
EXPLAIN (TYPES) SELECT count(*), k+v AS r FROM kv GROUP BY k+v
----
distribution: local
vectorized: true
·
• group (hash)
│ columns: (count int, r int)
│ estimated row count: 1,000 (missing stats)
│ aggregate 0: count_rows()
│ group by: column11
│
└── • render
    │ columns: (column11 int)
    │ render column11: ((k)[int] + (v)[int])[int]
    │
    └── • scan
          columns: (k int, v int)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

# Selecting a more complex expression, made up of things which are each grouped, works.
query T
EXPLAIN (TYPES) SELECT count(*), k+v AS r FROM kv GROUP BY k, v
----
distribution: local
vectorized: true
·
• render
│ columns: (count int, r int)
│ render r: ((k)[int] + (any_not_null)[int])[int]
│ render count_rows: (count_rows)[int]
│
└── • group (streaming)
    │ columns: (k int, count_rows int, any_not_null int)
    │ estimated row count: 1,000 (missing stats)
    │ aggregate 0: count_rows()
    │ aggregate 1: any_not_null(v)
    │ group by: k
    │ ordered: +k
    │
    └── • scan
          columns: (k int, v int)
          ordering: +k
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (TYPES) SELECT count(k) FROM kv
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (count int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: count_rows()
│
└── • scan
      columns: ()
      estimated row count: 1,000 (missing stats)
      table: kv@kv_pkey
      spans: FULL SCAN

query T
EXPLAIN (TYPES) SELECT count(k), sum(k), max(k) FROM kv
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (count int, sum decimal, max int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: count_rows()
│ aggregate 1: sum(k)
│ aggregate 2: max(k)
│
└── • scan
      columns: (k int)
      estimated row count: 1,000 (missing stats)
      table: kv@kv_pkey
      spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT count(v), count(DISTINCT v), sum(v), sum(DISTINCT v), min(v), min(DISTINCT v) FROM kv
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (count, count, sum, sum, min, min)
│ estimated row count: 1 (missing stats)
│ aggregate 0: count(v)
│ aggregate 1: count(DISTINCT v)
│ aggregate 2: sum(v)
│ aggregate 3: sum(DISTINCT v)
│ aggregate 4: min(v)
│ aggregate 5: min(v)
│
└── • scan
      columns: (v)
      estimated row count: 1,000 (missing stats)
      table: kv@kv_pkey
      spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT count(DISTINCT a.*) FROM kv a, kv b
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (count)
│ estimated row count: 1 (missing stats)
│ aggregate 0: count(column19)
│
└── • distinct
    │ columns: (column19)
    │ estimated row count: 1,000 (missing stats)
    │ distinct on: column19
    │
    └── • render
        │ columns: (column19)
        │ render column19: ((k, v, w, s, arr) AS k, v, w, s, arr)
        │
        └── • cross join (inner)
            │ columns: (k, v, w, s, arr)
            │ estimated row count: 1,000,000 (missing stats)
            │
            ├── • scan
            │     columns: (k, v, w, s, arr)
            │     estimated row count: 1,000 (missing stats)
            │     table: kv@kv_pkey
            │     spans: FULL SCAN
            │
            └── • scan
                  columns: ()
                  estimated row count: 1,000 (missing stats)
                  table: kv@kv_pkey
                  spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT min(b.k) FROM kv a, kv b GROUP BY a.*
----
distribution: local
vectorized: true
·
• project
│ columns: (min)
│
└── • group (hash)
    │ columns: (k, min)
    │ estimated row count: 1,000 (missing stats)
    │ aggregate 0: min(k)
    │ group by: k
    │
    └── • cross join (inner)
        │ columns: (k, k)
        │ estimated row count: 1,000,000 (missing stats)
        │
        ├── • scan
        │     columns: (k)
        │     estimated row count: 1,000 (missing stats)
        │     table: kv@kv_pkey
        │     spans: FULL SCAN
        │
        └── • scan
              columns: (k)
              estimated row count: 1,000 (missing stats)
              table: kv@kv_pkey
              spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT min(b.k) FROM kv a, kv b GROUP BY (1, (a.*))
----
distribution: local
vectorized: true
·
• project
│ columns: (min)
│
└── • group (hash)
    │ columns: (k, min)
    │ estimated row count: 1,000 (missing stats)
    │ aggregate 0: min(k)
    │ group by: k
    │
    └── • cross join (inner)
        │ columns: (k, k)
        │ estimated row count: 1,000,000 (missing stats)
        │
        ├── • scan
        │     columns: (k)
        │     estimated row count: 1,000 (missing stats)
        │     table: kv@kv_pkey
        │     spans: FULL SCAN
        │
        └── • scan
              columns: (k)
              estimated row count: 1,000 (missing stats)
              table: kv@kv_pkey
              spans: FULL SCAN

# A useful optimization: naked tuple expansion in GROUP BY clause.
query T
EXPLAIN (VERBOSE) SELECT min(b.k) FROM kv a, kv b GROUP BY (a.*)
----
distribution: local
vectorized: true
·
• project
│ columns: (min)
│
└── • group (hash)
    │ columns: (k, min)
    │ estimated row count: 1,000 (missing stats)
    │ aggregate 0: min(k)
    │ group by: k
    │
    └── • cross join (inner)
        │ columns: (k, k)
        │ estimated row count: 1,000,000 (missing stats)
        │
        ├── • scan
        │     columns: (k)
        │     estimated row count: 1,000 (missing stats)
        │     table: kv@kv_pkey
        │     spans: FULL SCAN
        │
        └── • scan
              columns: (k)
              estimated row count: 1,000 (missing stats)
              table: kv@kv_pkey
              spans: FULL SCAN

# Show reuse of renders expression inside an expansion.
query T
EXPLAIN (VERBOSE) SELECT a.v FROM kv a, kv b GROUP BY a.v, a.w, a.s
----
distribution: local
vectorized: true
·
• project
│ columns: (v)
│
└── • distinct
    │ columns: (v, w, s)
    │ estimated row count: 1,000 (missing stats)
    │ distinct on: v, w, s
    │
    └── • cross join (inner)
        │ columns: (v, w, s)
        │ estimated row count: 1,000,000 (missing stats)
        │
        ├── • scan
        │     columns: (v, w, s)
        │     estimated row count: 1,000 (missing stats)
        │     table: kv@kv_pkey
        │     spans: FULL SCAN
        │
        └── • scan
              columns: ()
              estimated row count: 1,000 (missing stats)
              table: kv@kv_pkey
              spans: FULL SCAN

statement ok
CREATE TABLE abc (
  a CHAR PRIMARY KEY,
  b FLOAT,
  c BOOLEAN,
  d DECIMAL
)

query T
EXPLAIN (TYPES) SELECT min(a) FROM abc
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min char)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(a)
│
└── • scan
      columns: (a char)
      estimated row count: 1 (missing stats)
      table: abc@abc_pkey
      spans: LIMITED SCAN
      limit: 1

query T
EXPLAIN SELECT min(a), max(a) FROM abc
----
distribution: local
vectorized: true
·
• root
│
├── • values
│     size: 2 columns, 1 row
│
├── • subquery
│   │ id: @S1
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • scan
│             missing stats
│             table: abc@abc_pkey
│             spans: LIMITED SCAN
│             limit: 1
│
└── • subquery
    │ id: @S2
    │ original sql: <unknown>
    │ exec mode: one row
    │
    └── • group (scalar)
        │
        └── • revscan
              missing stats
              table: abc@abc_pkey
              spans: LIMITED SCAN
              limit: 1

statement ok
CREATE TABLE xyz (
  x INT PRIMARY KEY,
  y INT,
  z FLOAT,
  INDEX xy (x, y),
  INDEX zyx (z, y, x),
  FAMILY (x),
  FAMILY (y),
  FAMILY (z)
)

statement ok
INSERT INTO xyz VALUES (1, 2, 3.0), (4, 5, 6.0), (7, NULL, 8.0)

query T
EXPLAIN (TYPES) SELECT min(x) FROM xyz
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(x)
│
└── • scan
      columns: (x int)
      estimated row count: 1 (missing stats)
      table: xyz@xy
      spans: LIMITED SCAN
      limit: 1

query T
EXPLAIN (TYPES) SELECT min(x) FROM xyz WHERE x in (0, 4, 7)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(x)
│
└── • scan
      columns: (x int)
      estimated row count: 1 (missing stats)
      table: xyz@xy
      spans: /0-/1 /4-/5 /7-/8
      limit: 1

query T
EXPLAIN (TYPES) SELECT max(x) FROM xyz
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (max int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(x)
│
└── • revscan
      columns: (x int)
      estimated row count: 1 (missing stats)
      table: xyz@xy
      spans: LIMITED SCAN
      limit: 1

query T
EXPLAIN (TYPES) SELECT min(y) FROM xyz WHERE x = 1
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(y)
│
└── • project
    │ columns: (y int)
    │
    └── • scan
          columns: (x int, y int)
          estimated row count: 1 (missing stats)
          table: xyz@xy
          spans: /1/!NULL-/2

query T
EXPLAIN (TYPES) SELECT max(y) FROM xyz WHERE x = 1
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (max int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(y)
│
└── • project
    │ columns: (y int)
    │
    └── • scan
          columns: (x int, y int)
          estimated row count: 1 (missing stats)
          table: xyz@xy
          spans: /1/!NULL-/2

query T
EXPLAIN (TYPES) SELECT min(y) FROM xyz WHERE z = 7
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(y)
│
└── • project
    │ columns: (y int)
    │
    └── • scan
          columns: (y int, z float)
          estimated row count: 1 (missing stats)
          table: xyz@zyx
          spans: /7/!NULL-/7.000000000000001
          limit: 1

query T
EXPLAIN (TYPES) SELECT max(y) FROM xyz WHERE z = 7
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (max int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(y)
│
└── • project
    │ columns: (y int)
    │
    └── • revscan
          columns: (y int, z float)
          estimated row count: 1 (missing stats)
          table: xyz@zyx
          spans: /7/!NULL-/7.000000000000001
          limit: 1

query T
EXPLAIN (TYPES) SELECT min(x) FROM xyz WHERE (y, z) = (2, 3.0)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(x)
│
└── • project
    │ columns: (x int)
    │
    └── • scan
          columns: (x int, y int, z float)
          estimated row count: 1 (missing stats)
          table: xyz@zyx
          spans: /3/2-/3/3
          limit: 1

statement ok
SET tracing = on,kv,results; SELECT min(x) FROM xyz WHERE (y, z) = (2, 3.0); SET tracing = off

query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WITH ORDINALITY
 WHERE message LIKE 'fetched:%' OR message LIKE 'output row%'
 ORDER BY message LIKE 'fetched:%' DESC, ordinality ASC
----
fetched: /xyz/zyx/3.0/2/1 -> <undecoded>
output row: [1]

query T
EXPLAIN (TYPES) SELECT max(x) FROM xyz WHERE (z, y) = (3.0, 2)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (max int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(x)
│
└── • project
    │ columns: (x int)
    │
    └── • revscan
          columns: (x int, y int, z float)
          estimated row count: 1 (missing stats)
          table: xyz@zyx
          spans: /3/2-/3/3
          limit: 1

# MULTIPLE MIN/MAX

query T
EXPLAIN SELECT min(x), max(z) FROM xyz
----
distribution: local
vectorized: true
·
• root
│
├── • values
│     size: 2 columns, 1 row
│
├── • subquery
│   │ id: @S1
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • scan
│             missing stats
│             table: xyz@xy
│             spans: LIMITED SCAN
│             limit: 1
│
└── • subquery
    │ id: @S2
    │ original sql: <unknown>
    │ exec mode: one row
    │
    └── • group (scalar)
        │
        └── • revscan
              missing stats
              table: xyz@zyx
              spans: (/NULL - ]
              limit: 1

# The SUM disables ReplaceScalarMinMaxWithScalarSubqueries.
query T
EXPLAIN SELECT min(x), max(z), sum(x) FROM xyz
----
distribution: local
vectorized: true
·
• group (scalar)
│
└── • scan
      missing stats
      table: xyz@xyz_pkey
      spans: FULL SCAN

# We don't yet support the optimization to convert multiple aggregates to
# subqueries with limit 1 when there is a filter.
query T
EXPLAIN SELECT min(y), max(y) FROM xyz WHERE x in (0, 4, 7)
----
distribution: local
vectorized: true
·
• group (scalar)
│
└── • scan
      missing stats
      table: xyz@xy
      spans: [/0 - /0] [/4 - /4] [/7 - /7]

query T
EXPLAIN SELECT min(x), max(x) FROM xyz WHERE x = 1
----
distribution: local
vectorized: true
·
• group (scalar)
│
└── • scan
      missing stats
      table: xyz@xy
      spans: [/1 - /1]

# MULTIPLE MIN/MAX WITH FILTER

query T
EXPLAIN SELECT min(z), max(y) FROM xyz WHERE z in (3.0, 6.0, 8.0)
----
distribution: local
vectorized: true
·
• root
│
├── • values
│     size: 2 columns, 1 row
│
├── • subquery
│   │ id: @S1
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • scan
│             missing stats
│             table: xyz@zyx
│             spans: [/3.0 - /3.0] [/6.0 - /6.0] [/8.0 - /8.0]
│             limit: 1
│
└── • subquery
    │ id: @S2
    │ original sql: <unknown>
    │ exec mode: one row
    │
    └── • group (scalar)
        │
        └── • limit
            │ count: 1
            │
            └── • union all
                │
                ├── • union all
                │   │
                │   ├── • revscan
                │   │     missing stats
                │   │     table: xyz@zyx
                │   │     spans: (/3.0/NULL - /3.0]
                │   │     limit: 1
                │   │
                │   └── • revscan
                │         missing stats
                │         table: xyz@zyx
                │         spans: (/6.0/NULL - /6.0]
                │         limit: 1
                │
                └── • revscan
                      missing stats
                      table: xyz@zyx
                      spans: (/8.0/NULL - /8.0]
                      limit: 1

# The COUNT disables ReplaceFilteredScalarMinMaxWithSubqueries.
query T
EXPLAIN SELECT min(z), max(y), count(x) FROM xyz WHERE z in (3.0, 6.0, 8.0)
----
distribution: local
vectorized: true
·
• group (scalar)
│
└── • scan
      missing stats
      table: xyz@zyx
      spans: [/3.0 - /3.0] [/6.0 - /6.0] [/8.0 - /8.0]

# Filter on derived table is supported.
query T
EXPLAIN SELECT max(z), min(x) FROM (SELECT x,y,z FROM xyz a) dt WHERE dt.y > 0
----
distribution: local
vectorized: true
·
• root
│
├── • values
│     size: 2 columns, 1 row
│
├── • subquery
│   │ id: @S1
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • limit
│           │ count: 1
│           │
│           └── • filter
│               │ filter: y > 0
│               │
│               └── • revscan
│                     missing stats
│                     table: xyz@zyx
│                     spans: (/NULL - ]
│
└── • subquery
    │ id: @S2
    │ original sql: <unknown>
    │ exec mode: one row
    │
    └── • group (scalar)
        │
        └── • limit
            │ count: 1
            │
            └── • filter
                │ filter: y > 0
                │
                └── • scan
                      missing stats
                      table: xyz@xy
                      spans: FULL SCAN (SOFT LIMIT)

# Scalar subquery in filter is supported.
query T
EXPLAIN SELECT max(z), min(x) FROM xyz WHERE (z,x) = (SELECT max(z), min(x) FROM xyz)
----
distribution: local
vectorized: true
·
• root
│
├── • values
│     size: 2 columns, 1 row
│
├── • subquery
│   │ id: @S1
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • revscan
│             missing stats
│             table: xyz@zyx
│             spans: (/NULL - ]
│             limit: 1
│
├── • subquery
│   │ id: @S2
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • scan
│             missing stats
│             table: xyz@xy
│             spans: LIMITED SCAN
│             limit: 1
│
├── • subquery
│   │ id: @S3
│   │ original sql: (SELECT max(z), min(x) FROM xyz)
│   │ exec mode: one row
│   │
│   └── • render
│       │
│       └── • values
│             size: 2 columns, 1 row
│
├── • subquery
│   │ id: @S4
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • limit
│           │ count: 1
│           │
│           └── • filter
│               │ filter: (z, x) = @S3
│               │
│               └── • revscan
│                     missing stats
│                     table: xyz@zyx
│                     spans: (/NULL - ]
│
├── • subquery
│   │ id: @S5
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • revscan
│             missing stats
│             table: xyz@zyx
│             spans: (/NULL - ]
│             limit: 1
│
├── • subquery
│   │ id: @S6
│   │ original sql: <unknown>
│   │ exec mode: one row
│   │
│   └── • group (scalar)
│       │
│       └── • scan
│             missing stats
│             table: xyz@xy
│             spans: LIMITED SCAN
│             limit: 1
│
├── • subquery
│   │ id: @S7
│   │ original sql: (SELECT max(z), min(x) FROM xyz)
│   │ exec mode: one row
│   │
│   └── • render
│       │
│       └── • values
│             size: 2 columns, 1 row
│
└── • subquery
    │ id: @S8
    │ original sql: <unknown>
    │ exec mode: one row
    │
    └── • group (scalar)
        │
        └── • limit
            │ count: 1
            │
            └── • filter
                │ filter: (z, x) = @S7
                │
                └── • scan
                      missing stats
                      table: xyz@xyz_pkey
                      spans: FULL SCAN (SOFT LIMIT)

# VARIANCE/STDDEV

statement ok
SET tracing = on,kv,results; SELECT variance(x), variance(y::decimal), round(variance(z), 14) FROM xyz; SET tracing = off

query T
SELECT message FROM [SHOW KV TRACE FOR SESSION] WITH ORDINALITY
 WHERE message LIKE 'fetched:%' OR message LIKE 'output row%'
 ORDER BY message LIKE 'fetched:%' DESC, ordinality ASC
----
fetched: /xyz/xyz_pkey/1 -> <undecoded>
fetched: /xyz/xyz_pkey/1/y -> 2
fetched: /xyz/xyz_pkey/1/z -> 3.0
fetched: /xyz/xyz_pkey/4 -> <undecoded>
fetched: /xyz/xyz_pkey/4/y -> 5
fetched: /xyz/xyz_pkey/4/z -> 6.0
fetched: /xyz/xyz_pkey/7 -> <undecoded>
fetched: /xyz/xyz_pkey/7/z -> 8.0
output row: [9 4.5 6.33333333333333]

query T
EXPLAIN (TYPES) SELECT variance(x) FROM xyz WHERE x = 1
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (variance decimal)
│ estimated row count: 1 (missing stats)
│ aggregate 0: variance(x)
│
└── • scan
      columns: (x int)
      estimated row count: 1 (missing stats)
      table: xyz@xy
      spans: /1-/2

## Tests for the single-row optimization.
statement ok
CREATE TABLE ab (
  a INT PRIMARY KEY,
  b INT,
  FAMILY (a),
  FAMILY (b)
)

statement ok
INSERT INTO ab VALUES
  (1, 10),
  (2, 20),
  (3, 30),
  (4, 40),
  (5, 50)

#exec nodist
#EXPLAIN (EXPRS) SELECT min(a) FROM abc
#----
#group           ·            ·
# │              aggregate 0  min(a)
# └── render     ·            ·
#      │         render 0     a
#      └── scan  ·            ·
#·               table        abc@primary
#·               spans        ALL
#·               limit        1
#
## Verify we only buffer one row.
#exec
#SELECT message FROM [SHOW KV TRACE FOR SELECT min(a) FROM ab]
# WHERE message LIKE 'fetched:%' OR message LIKE 'output row%'
#----
#fetched: /ab/primary/1 -> NULL
#fetched: /ab/primary/1/b -> 10
#output row: [1]
#
#exec nodist
#EXPLAIN (EXPRS) SELECT max(a) FROM abc
#----
#group              ·            ·
# │                 aggregate 0  max(a)
# └── render        ·            ·
#      │            render 0     a
#      └── revscan  ·            ·
#·                  table        abc@primary
#·                  spans        ALL
#·                  limit        1
#
## Verify we only buffer one row.
#exec
#SELECT message FROM [SHOW KV TRACE FOR SELECT max(a) FROM ab]
# WHERE message LIKE 'fetched:%' OR message LIKE 'output row%'
#----
#fetched: /ab/primary/5/b -> 50
#fetched: /ab/primary/5 -> NULL
#output row: [5]

query T
EXPLAIN (TYPES) SELECT v, count(k) FROM kv GROUP BY v ORDER BY count(k)
----
distribution: local
vectorized: true
·
• sort
│ columns: (v int, count int)
│ estimated row count: 100 (missing stats)
│ order: +count_rows
│
└── • group (hash)
    │ columns: (v int, count_rows int)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: count_rows()
    │ group by: v
    │
    └── • scan
          columns: (v int)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (TYPES) SELECT v, count(*) FROM kv GROUP BY v ORDER BY count(*)
----
distribution: local
vectorized: true
·
• sort
│ columns: (v int, count int)
│ estimated row count: 100 (missing stats)
│ order: +count_rows
│
└── • group (hash)
    │ columns: (v int, count_rows int)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: count_rows()
    │ group by: v
    │
    └── • scan
          columns: (v int)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (TYPES) SELECT v, count(NULL) FROM kv GROUP BY v ORDER BY count(1)
----
distribution: local
vectorized: true
·
• project
│ columns: (v int, count int)
│
└── • sort
    │ columns: (v int, count int, count_rows int)
    │ estimated row count: 100 (missing stats)
    │ order: +count_rows
    │
    └── • group (hash)
        │ columns: (v int, count int, count_rows int)
        │ estimated row count: 100 (missing stats)
        │ aggregate 0: count(column10)
        │ aggregate 1: count_rows()
        │ group by: v
        │
        └── • render
            │ columns: (column10 unknown, v int)
            │ render column10: (NULL)[unknown]
            │ render v: (v)[int]
            │
            └── • scan
                  columns: (v int)
                  estimated row count: 1,000 (missing stats)
                  table: kv@kv_pkey
                  spans: FULL SCAN

# Check that filters propagate through no-op aggregation.
query T
EXPLAIN (VERBOSE) SELECT * FROM (SELECT v, count(NULL) FROM kv GROUP BY v) WHERE v > 10
----
distribution: local
vectorized: true
·
• group (hash)
│ columns: (v, count)
│ estimated row count: 33 (missing stats)
│ aggregate 0: count(column10)
│ group by: v
│
└── • render
    │ columns: (column10, v)
    │ render column10: NULL
    │ render v: v
    │
    └── • filter
        │ columns: (v)
        │ estimated row count: 333 (missing stats)
        │ filter: v > 10
        │
        └── • scan
              columns: (v)
              estimated row count: 1,000 (missing stats)
              table: kv@kv_pkey
              spans: FULL SCAN

# Verify that FILTER works.

statement ok
CREATE TABLE filter_test (
  k INT,
  v INT,
  mark BOOL
)

# Check that filter expressions are only rendered once.
query T
EXPLAIN (VERBOSE) SELECT count(*) FILTER (WHERE k>5), max(k>5) FILTER(WHERE k>5) FROM filter_test GROUP BY v
----
distribution: local
vectorized: true
·
• project
│ columns: (count, max)
│
└── • group (hash)
    │ columns: (v, count, max)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: count(column9) FILTER (WHERE column10)
    │ aggregate 1: max(column10) FILTER (WHERE column10)
    │ group by: v
    │
    └── • render
        │ columns: (column9, column10, v)
        │ render column9: true
        │ render column10: k > 5
        │ render v: v
        │
        └── • scan
              columns: (k, v)
              estimated row count: 1,000 (missing stats)
              table: filter_test@filter_test_pkey
              spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT count(*) FILTER (WHERE k > 5) FROM filter_test GROUP BY v
----
distribution: local
vectorized: true
·
• project
│ columns: (count)
│
└── • group (hash)
    │ columns: (v, count)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: count(column9) FILTER (WHERE column10)
    │ group by: v
    │
    └── • render
        │ columns: (column9, column10, v)
        │ render column9: true
        │ render column10: k > 5
        │ render v: v
        │
        └── • scan
              columns: (k, v)
              estimated row count: 1,000 (missing stats)
              table: filter_test@filter_test_pkey
              spans: FULL SCAN

# Tests with * inside GROUP BY.
query T
EXPLAIN (TYPES) SELECT 1 a FROM kv GROUP BY kv.*;
----
distribution: local
vectorized: true
·
• render
│ columns: (a int)
│ render a: (1)[int]
│
└── • scan
      columns: ()
      estimated row count: 1,000 (missing stats)
      table: kv@kv_pkey
      spans: FULL SCAN

query T
EXPLAIN (TYPES) SELECT sum(abc.d) FROM kv JOIN abc ON kv.k >= abc.d GROUP BY kv.*;
----
distribution: local
vectorized: true
·
• project
│ columns: (sum decimal)
│
└── • group (hash)
    │ columns: (k int, sum decimal)
    │ estimated row count: 1,000 (missing stats)
    │ aggregate 0: sum(d)
    │ group by: k
    │
    └── • cross join (inner)
        │ columns: (k int, d decimal)
        │ estimated row count: 330,000 (missing stats)
        │ pred: ((k)[int] >= (d)[decimal])[bool]
        │
        ├── • scan
        │     columns: (k int)
        │     estimated row count: 1,000 (missing stats)
        │     table: kv@kv_pkey
        │     spans: FULL SCAN
        │
        └── • scan
              columns: (d decimal)
              estimated row count: 1,000 (missing stats)
              table: abc@abc_pkey
              spans: FULL SCAN

# opt_test is used for tests around the single-row optimization for MIN/MAX.
statement ok
CREATE TABLE opt_test (k INT PRIMARY KEY, v INT, INDEX v(v))

# Verify that we correctly add the v IS NOT NULL constraint (which restricts the span).
query T
EXPLAIN (TYPES) SELECT min(v) FROM opt_test
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(v)
│
└── • scan
      columns: (v int)
      estimated row count: 1 (missing stats)
      table: opt_test@v
      spans: /!NULL-
      limit: 1

# Repeat test when there is an existing filter.
# TODO(radu): the best plan for this would be to use index v; in this case the scan
# will end early but that is not reflected by the cost.
query T
EXPLAIN (TYPES) SELECT min(v) FROM opt_test WHERE k <> 4
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(v)
│
└── • project
    │ columns: (v int)
    │
    └── • limit
        │ columns: (k int, v int)
        │ count: (1)[int]
        │
        └── • filter
            │ columns: (k int, v int)
            │ ordering: +v
            │ estimated row count: 333 (missing stats)
            │ filter: ((k)[int] != (4)[int])[bool]
            │
            └── • scan
                  columns: (k int, v int)
                  ordering: +v
                  estimated row count: 990 (missing stats)
                  table: opt_test@v
                  spans: /!NULL-

# Check that the optimization doesn't work when the argument is non-trivial (we
# can't in general guarantee an ordering on a synthesized column).
query T
EXPLAIN (TYPES) SELECT min(v+1) FROM opt_test
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (min int)
│ estimated row count: 1 (missing stats)
│ aggregate 0: any_not_null(column7)
│
└── • top-k
    │ columns: (column7 int)
    │ estimated row count: 1 (missing stats)
    │ order: +column7
    │ k: 1
    │
    └── • render
        │ columns: (column7 int)
        │ render column7: ((v)[int] + (1)[int])[int]
        │
        └── • filter
            │ columns: (v int)
            │ estimated row count: 333 (missing stats)
            │ filter: ((((v)[int] + (1)[int])[int]) IS NOT NULL)[bool]
            │
            └── • scan
                  columns: (v int)
                  estimated row count: 1,000 (missing stats)
                  table: opt_test@opt_test_pkey
                  spans: FULL SCAN

# Verify that we don't use the optimization if there is a GROUP BY.
query T
EXPLAIN (TYPES) SELECT min(v) FROM opt_test GROUP BY k
----
distribution: local
vectorized: true
·
• project
│ columns: (min int)
│
└── • group (streaming)
    │ columns: (k int, min int)
    │ estimated row count: 1,000 (missing stats)
    │ aggregate 0: min(v)
    │ group by: k
    │ ordered: +k
    │
    └── • scan
          columns: (k int, v int)
          ordering: +k
          estimated row count: 1,000 (missing stats)
          table: opt_test@opt_test_pkey
          spans: FULL SCAN

statement ok
CREATE TABLE xy(x STRING, y STRING);

query T
EXPLAIN (TYPES) SELECT (b, a) r FROM ab GROUP BY (b, a)
----
distribution: local
vectorized: true
·
• render
│ columns: (r tuple{int, int})
│ render r: (((b)[int], (a)[int]))[tuple{int, int}]
│
└── • scan
      columns: (a int, b int)
      estimated row count: 1,000 (missing stats)
      table: ab@ab_pkey
      spans: FULL SCAN

query T
EXPLAIN (TYPES) SELECT min(y), (b, a) r FROM ab, xy GROUP BY (x, (a, b))
----
distribution: local
vectorized: true
·
• render
│ columns: (min string, r tuple{int, int})
│ render r: (((any_not_null)[int], (a)[int]))[tuple{int, int}]
│ render min: (min)[string]
│
└── • group (hash)
    │ columns: (a int, x string, min string, any_not_null int)
    │ estimated row count: 100,000 (missing stats)
    │ aggregate 0: min(y)
    │ aggregate 1: any_not_null(b)
    │ group by: a, x
    │
    └── • cross join (inner)
        │ columns: (a int, b int, x string, y string)
        │ estimated row count: 1,000,000 (missing stats)
        │
        ├── • scan
        │     columns: (a int, b int)
        │     estimated row count: 1,000 (missing stats)
        │     table: ab@ab_pkey
        │     spans: FULL SCAN
        │
        └── • scan
              columns: (x string, y string)
              estimated row count: 1,000 (missing stats)
              table: xy@xy_pkey
              spans: FULL SCAN

# Test that ordering on GROUP BY columns is maintained.
# TODO(radu): Derive GROUP BY ordering in physicalPropsBuilder.
#exec-raw
#CREATE TABLE group_ord (
#  x INT PRIMARY KEY,
#  y INT,
#  z INT,
#  INDEX foo(z)
#)
#----
#
## The ordering is on all the GROUP BY columns, and isn't preserved after the
## aggregation.
#exec hide-colnames nodist
#EXPLAIN (TYPES) SELECT x, max(y) FROM group_ord GROUP BY x
#----
#group      ·            ·                  (x, max)  ·
# │         aggregate 0  x                  ·         ·
# │         aggregate 1  max(y)             ·         ·
# │         group by     @1                 ·         ·
# └── scan  ·            ·                  (x, y)    ·
#·          table        group_ord@primary  ·         ·
#·          spans        ALL                ·         ·
#
## The ordering is on all the GROUP BY columns, and is preserved after the
## aggregation.
#exec hide-colnames nodist
#EXPLAIN (TYPES) SELECT x, max(y) FROM group_ord GROUP BY x ORDER BY x
#----
#sort            ·            ·                  (x, max)  +x
# │              order        +x                 ·         ·
# └── group      ·            ·                  (x, max)  ·
#      │         aggregate 0  x                  ·         ·
#      │         aggregate 1  max(y)             ·         ·
#      │         group by     @1                 ·         ·
#      └── scan  ·            ·                  (x, y)    ·
#·               table        group_ord@primary  ·         ·
#·               spans        ALL                ·         ·
#
## The ordering is on some of the GROUP BY columns, and isn't preserved after
## the aggregation.
#exec hide-colnames nodist
#EXPLAIN (TYPES) SELECT z, x, max(y) FROM group_ord GROUP BY x, z
#----
#render          ·            ·                  (z, x, max)   ·
# │              render 0     z                  ·             ·
# │              render 1     x                  ·             ·
# │              render 2     agg0               ·             ·
# └── group      ·            ·                  (x, z, agg0)  ·
#      │         aggregate 0  x                  ·             ·
#      │         aggregate 1  z                  ·             ·
#      │         aggregate 2  max(y)             ·             ·
#      │         group by     @1,@3              ·             ·
#      └── scan  ·            ·                  (x, y, z)     ·
#·               table        group_ord@primary  ·             ·
#·               spans        ALL                ·             ·
#
## The ordering is on some of the GROUP BY columns, and is preserved after
## the aggregation.
#exec hide-colnames nodist
#EXPLAIN (TYPES) SELECT z, x, max(y) FROM group_ord GROUP BY x, z ORDER BY x
#----
#render               ·            ·                  (z, x, max)   ·
# │                   render 0     z                  ·             ·
# │                   render 1     x                  ·             ·
# │                   render 2     agg0               ·             ·
# └── sort            ·            ·                  (x, z, agg0)  +x
#      │              order        +x                 ·             ·
#      └── group      ·            ·                  (x, z, agg0)  ·
#           │         aggregate 0  x                  ·             ·
#           │         aggregate 1  z                  ·             ·
#           │         aggregate 2  max(y)             ·             ·
#           │         group by     @1,@3              ·             ·
#           └── scan  ·            ·                  (x, y, z)     ·
#·                    table        group_ord@primary  ·             ·
#·                    spans        ALL                ·             ·
#
## If the underlying ordering isn't from the primary index, it needs to be hinted
## for now.
#exec hide-colnames nodist
#EXPLAIN (TYPES) SELECT z, max(y) FROM group_ord@foo GROUP BY z
#----
#group      ·            ·                  (z, max)  ·
# │         aggregate 0  z                  ·         ·
# │         aggregate 1  max(y)             ·         ·
# │         group by     @2                 ·         ·
# └── scan  ·            ·                  (y, z)    ·
#·          table        group_ord@primary  ·         ·
#·          spans        ALL                ·         ·
#
## Test that a merge join is used on two aggregate subqueries with orderings on
## the GROUP BY columns. Note that an ORDER BY is not necessary on the
## subqueries.
#exec hide-colnames nodist
#EXPLAIN (TYPES) SELECT * FROM (SELECT x, max(y) FROM group_ord GROUP BY x) JOIN (SELECT z, min(y) FROM group_ord@foo GROUP BY z) ON x = z
#----
#join            ·            ·                  (x, max, z, min)  ·
# │              type         inner              ·                 ·
# │              equality     (x) = (z)          ·                 ·
# ├── group      ·            ·                  (x, agg0)         ·
# │    │         aggregate 0  x                  ·                 ·
# │    │         aggregate 1  max(y)             ·                 ·
# │    │         group by     @1                 ·                 ·
# │    └── scan  ·            ·                  (x, y)            ·
# │              table        group_ord@primary  ·                 ·
# │              spans        ALL                ·                 ·
# └── group      ·            ·                  (z, agg0)         ·
#      │         aggregate 0  z                  ·                 ·
#      │         aggregate 1  min(y)             ·                 ·
#      │         group by     @2                 ·                 ·
#      └── scan  ·            ·                  (y, z)            ·
#·               table        group_ord@primary  ·                 ·
#·               spans        ALL                ·                 ·

# Regression test for #25533 (crash when propagating filter through GROUP BY).
query T
EXPLAIN (TYPES) SELECT 1 a FROM kv GROUP BY v, w::DECIMAL HAVING w::DECIMAL > 1;
----
distribution: local
vectorized: true
·
• render
│ columns: (a int)
│ render a: (1)[int]
│
└── • distinct
    │ columns: (column10 decimal, v int)
    │ estimated row count: 333 (missing stats)
    │ distinct on: column10, v
    │
    └── • filter
        │ columns: (column10 decimal, v int)
        │ estimated row count: 333 (missing stats)
        │ filter: ((column10)[decimal] > (1)[decimal])[bool]
        │
        └── • render
            │ columns: (column10 decimal, v int)
            │ render column10: ((w)[int]::DECIMAL)[decimal]
            │ render v: (v)[int]
            │
            └── • scan
                  columns: (v int, w int)
                  estimated row count: 1,000 (missing stats)
                  table: kv@kv_pkey
                  spans: FULL SCAN

statement ok
CREATE TABLE foo(a INT, b CHAR)

statement ok
SET allow_ordinal_column_references=true

# Check that GROUP BY picks up column ordinals.
query T
EXPLAIN (VERBOSE) SELECT min(a) AS m FROM foo GROUP BY @1
----
distribution: local
vectorized: true
·
• project
│ columns: (m)
│
└── • group (hash)
    │ columns: (column9, min)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: min(a)
    │ group by: column9
    │
    └── • render
        │ columns: (column9, a)
        │ render column9: a
        │ render a: a
        │
        └── • scan
              columns: (a)
              estimated row count: 1,000 (missing stats)
              table: foo@foo_pkey
              spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT min(a) AS m FROM foo GROUP BY @2
----
distribution: local
vectorized: true
·
• project
│ columns: (m)
│
└── • group (hash)
    │ columns: (column9, min)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: min(a)
    │ group by: column9
    │
    └── • render
        │ columns: (column9, a)
        │ render column9: b
        │ render a: a
        │
        └── • scan
              columns: (a, b)
              estimated row count: 1,000 (missing stats)
              table: foo@foo_pkey
              spans: FULL SCAN

statement ok
SET allow_ordinal_column_references=false

query T
EXPLAIN (VERBOSE) SELECT array_agg(v) FROM (SELECT * FROM kv ORDER BY v)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (array_agg)
│ estimated row count: 1 (missing stats)
│ aggregate 0: array_agg(v)
│
└── • sort
    │ columns: (v)
    │ estimated row count: 1,000 (missing stats)
    │ order: +v
    │
    └── • scan
          columns: (v)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT array_cat_agg(arr) FROM (SELECT * FROM kv ORDER BY v)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (array_cat_agg)
│ estimated row count: 1 (missing stats)
│ aggregate 0: array_cat_agg(arr)
│
└── • sort
    │ columns: (v, arr)
    │ estimated row count: 1,000 (missing stats)
    │ order: +v
    │
    └── • scan
          columns: (v, arr)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT k FROM kv ORDER BY s
----
distribution: local
vectorized: true
·
• project
│ columns: (k)
│
└── • sort
    │ columns: (k, s)
    │ estimated row count: 1,000 (missing stats)
    │ order: +s
    │
    └── • scan
          columns: (k, s)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT concat_agg(s) FROM (SELECT s FROM kv ORDER BY k)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (concat_agg)
│ estimated row count: 1 (missing stats)
│ aggregate 0: concat_agg(s)
│
└── • scan
      columns: (k, s)
      ordering: +k
      estimated row count: 1,000 (missing stats)
      table: kv@kv_pkey
      spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT array_agg(k) FROM (SELECT k FROM kv ORDER BY s)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (array_agg)
│ estimated row count: 1 (missing stats)
│ aggregate 0: array_agg(k)
│
└── • sort
    │ columns: (k, s)
    │ estimated row count: 1,000 (missing stats)
    │ order: +s
    │
    └── • scan
          columns: (k, s)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT string_agg(s, ',') FROM (SELECT s FROM kv ORDER BY k)
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (string_agg)
│ estimated row count: 1 (missing stats)
│ aggregate 0: string_agg(s, column10)
│
└── • render
    │ columns: (column10, k, s)
    │ ordering: +k
    │ render column10: ','
    │ render k: k
    │ render s: s
    │
    └── • scan
          columns: (k, s)
          ordering: +k
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

# Verify that we project away all input columns for count(*).
query T
EXPLAIN (VERBOSE) SELECT count(*) FROM xyz JOIN kv ON y=v
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (count)
│ estimated row count: 1 (missing stats)
│ aggregate 0: count_rows()
│
└── • project
    │ columns: ()
    │
    └── • hash join (inner)
        │ columns: (y, v)
        │ estimated row count: 9,801 (missing stats)
        │ equality: (y) = (v)
        │
        ├── • scan
        │     columns: (y)
        │     estimated row count: 1,000 (missing stats)
        │     table: xyz@xy
        │     spans: FULL SCAN
        │
        └── • scan
              columns: (v)
              estimated row count: 1,000 (missing stats)
              table: kv@kv_pkey
              spans: FULL SCAN


# Regression test for #31882: make sure we don't incorrectly advertise an
# ordering of +w at the scan node.
statement ok
CREATE TABLE uvw (u INT, v INT, w INT, INDEX uvw(u, v, w))

query T
EXPLAIN (VERBOSE) SELECT u, v, array_agg(w) AS s FROM (SELECT * FROM uvw ORDER BY w) GROUP BY u, v
----
distribution: local
vectorized: true
·
• group (streaming)
│ columns: (u, v, s)
│ estimated row count: 1,000 (missing stats)
│ aggregate 0: array_agg(w)
│ group by: u, v
│ ordered: +u,+v
│
└── • scan
      columns: (u, v, w)
      ordering: +u,+v,+w
      estimated row count: 1,000 (missing stats)
      table: uvw@uvw
      spans: FULL SCAN

query T
EXPLAIN (VERBOSE) SELECT string_agg(s, ', ') FROM kv
----
distribution: local
vectorized: true
·
• group (scalar)
│ columns: (string_agg)
│ estimated row count: 1 (missing stats)
│ aggregate 0: string_agg(s, column10)
│
└── • render
    │ columns: (column10, s)
    │ render column10: ', '
    │ render s: s
    │
    └── • scan
          columns: (s)
          estimated row count: 1,000 (missing stats)
          table: kv@kv_pkey
          spans: FULL SCAN

statement ok
CREATE TABLE string_agg_test (
  id INT PRIMARY KEY,
  company_id INT,
  employee STRING
)

query T
EXPLAIN (VERBOSE)
    SELECT
        company_id, string_agg(employee, ',')
    FROM
        string_agg_test
    GROUP BY
        company_id
    ORDER BY
        company_id
----
distribution: local
vectorized: true
·
• sort
│ columns: (company_id, string_agg)
│ estimated row count: 100 (missing stats)
│ order: +company_id
│
└── • group (hash)
    │ columns: (company_id, string_agg)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: string_agg(employee, column8)
    │ group by: company_id
    │
    └── • render
        │ columns: (column8, company_id, employee)
        │ render column8: ','
        │ render company_id: company_id
        │ render employee: employee
        │
        └── • scan
              columns: (company_id, employee)
              estimated row count: 1,000 (missing stats)
              table: string_agg_test@string_agg_test_pkey
              spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
    SELECT
        company_id, string_agg(employee::BYTES, b',')
    FROM
        string_agg_test
    GROUP BY
        company_id
    ORDER BY
        company_id
----
distribution: local
vectorized: true
·
• sort
│ columns: (company_id, string_agg)
│ estimated row count: 100 (missing stats)
│ order: +company_id
│
└── • group (hash)
    │ columns: (company_id, string_agg)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: string_agg(column8, column9)
    │ group by: company_id
    │
    └── • render
        │ columns: (column8, column9, company_id)
        │ render column8: employee::BYTES
        │ render column9: '\x2c'
        │ render company_id: company_id
        │
        └── • scan
              columns: (company_id, employee)
              estimated row count: 1,000 (missing stats)
              table: string_agg_test@string_agg_test_pkey
              spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
    SELECT
        company_id, string_agg(employee, NULL)
    FROM
        string_agg_test
    GROUP BY
        company_id
    ORDER BY
        company_id
----
distribution: local
vectorized: true
·
• sort
│ columns: (company_id, string_agg)
│ estimated row count: 100 (missing stats)
│ order: +company_id
│
└── • group (hash)
    │ columns: (company_id, string_agg)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: string_agg(employee, column8)
    │ group by: company_id
    │
    └── • render
        │ columns: (column8, company_id, employee)
        │ render column8: NULL
        │ render company_id: company_id
        │ render employee: employee
        │
        └── • scan
              columns: (company_id, employee)
              estimated row count: 1,000 (missing stats)
              table: string_agg_test@string_agg_test_pkey
              spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
    SELECT
        company_id, string_agg(employee::BYTES, NULL)
    FROM
        string_agg_test
    GROUP BY
        company_id
    ORDER BY
        company_id
----
distribution: local
vectorized: true
·
• sort
│ columns: (company_id, string_agg)
│ estimated row count: 100 (missing stats)
│ order: +company_id
│
└── • group (hash)
    │ columns: (company_id, string_agg)
    │ estimated row count: 100 (missing stats)
    │ aggregate 0: string_agg(column8, column9)
    │ group by: company_id
    │
    └── • render
        │ columns: (column8, column9, company_id)
        │ render column8: employee::BYTES
        │ render column9: NULL
        │ render company_id: company_id
        │
        └── • scan
              columns: (company_id, employee)
              estimated row count: 1,000 (missing stats)
              table: string_agg_test@string_agg_test_pkey
              spans: FULL SCAN

# Regression test for correctly marking partially streaming aggregation
# (#124101).
statement ok
CREATE TABLE t124101 (
  a INT,
  b INT,
  c INT,
  PRIMARY KEY (a, b),
  UNIQUE (a, c)
);
SET testing_optimizer_disable_rule_probability = 1.000000;

query T
EXPLAIN SELECT a, sum_int(c) AS s FROM t124101 GROUP BY a, c ORDER BY a, a, s, c LIMIT 2;
----
distribution: local
vectorized: true
·
• limit
│ count: 2
│
└── • sort
    │ order: +a,+a,+sum_int,+c
    │ already ordered: +a,+a
    │
    └── • group (partial streaming)
        │ group by: a, c
        │ ordered: +a,+a
        │
        └── • scan
              missing stats
              table: t124101@t124101_pkey
              spans: FULL SCAN

statement ok
RESET testing_optimizer_disable_rule_probability;
