exec-ddl
CREATE TABLE abc (a INT PRIMARY KEY, b INT, c INT, INDEX (b))
----

# Use statistics from:
# INSERT INTO abc SELECT i, i % 10, i % 2 FROM generate_series(0, 19) s(i)
exec-ddl
ALTER TABLE abc INJECT STATISTICS '[
      {
          "avg_size": 1,
          "columns": [
              "a"
          ],
          "created_at": "2024-03-14 23:28:28.157251",
          "distinct_count": 20,
          "histo_buckets": [
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "0"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "1"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "2"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "3"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "4"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "5"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "6"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "7"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "8"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "9"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "10"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "11"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "12"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "13"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "14"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "15"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "16"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "17"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "18"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "19"
              }
          ],
          "histo_col_type": "INT8",
          "histo_version": 3,
          "null_count": 0,
          "row_count": 20
      },
      {
          "avg_size": 2,
          "columns": [
              "b"
          ],
          "created_at": "2024-03-14 23:28:28.157251",
          "distinct_count": 10,
          "histo_buckets": [
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "0"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "1"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "2"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "3"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "4"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "5"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "6"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "7"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "8"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 2,
                  "num_range": 0,
                  "upper_bound": "9"
              }
          ],
          "histo_col_type": "INT8",
          "histo_version": 3,
          "null_count": 0,
          "row_count": 20
      },
      {
          "avg_size": 2,
          "columns": [
              "c"
          ],
          "created_at": "2024-03-14 23:28:28.157251",
          "distinct_count": 2,
          "histo_buckets": [
              {
                  "distinct_range": 0,
                  "num_eq": 10,
                  "num_range": 0,
                  "upper_bound": "0"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 10,
                  "num_range": 0,
                  "upper_bound": "1"
              }
          ],
          "histo_col_type": "INT8",
          "histo_version": 3,
          "null_count": 0,
          "row_count": 20
      }
]'
----

# TopK is currently only added by exploration rules. To test buildTopK and
# colStatTopK we must directly construct plans that would usually be arrived at
# through exploration.

# SELECT * FROM abc ORDER BY c LIMIT 4
expr
(TopK
  (Scan [(Table "abc") (Cols "a,b,c")])
  [(K 4) (Ordering (OrderingChoice "+c"))])
----
top-k
 ├── columns: a:1(int!null) b:2(int) c:3(int)
 ├── internal-ordering: +3
 ├── k: 4
 ├── cardinality: [0 - 4]
 ├── stats: [rows=4]
 ├── key: (1)
 ├── fd: (1)-->(2,3)
 └── scan abc
      ├── columns: a:1(int!null) b:2(int) c:3(int)
      ├── stats: [rows=20]
      ├── key: (1)
      └── fd: (1)-->(2,3)

# SELECT * FROM abc ORDER BY c, a LIMIT 4
expr colstat=1 colstat=2 colstat=3
(TopK
  (Scan [(Table "abc") (Cols "a,b,c")])
  [(K 4) (Ordering (OrderingChoice "+c,+a"))])
----
top-k
 ├── columns: a:1(int!null) b:2(int) c:3(int)
 ├── internal-ordering: +3,+1
 ├── k: 4
 ├── cardinality: [0 - 4]
 ├── stats: [rows=4, distinct(1)=4, null(1)=0, distinct(2)=3.6, null(2)=0, distinct(3)=1.78525, null(3)=0]
 ├── key: (1)
 ├── fd: (1)-->(2,3)
 └── scan abc
      ├── columns: a:1(int!null) b:2(int) c:3(int)
      ├── stats: [rows=20, distinct(1)=20, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=2, null(3)=0]
      │   histogram(1)=  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1
      │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9 --- 10 --- 11 --- 12 --- 13 --- 14 --- 15 --- 16 --- 17 --- 18 --- 19
      │   histogram(2)=  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2
      │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9
      │   histogram(3)=  0 10  0 10
      │                <--- 0 --- 1
      ├── key: (1)
      └── fd: (1)-->(2,3)

# SELECT FROM abc ORDER BY c, b LIMIT 4
expr
(TopK
  (Scan [(Table "abc") (Cols "")])
  [(K 4) (Ordering (OrderingChoice "+c,+b"))])
----
top-k
 ├── internal-ordering: +3,+2
 ├── k: 4
 ├── cardinality: [0 - 4]
 ├── stats: [rows=4]
 └── scan abc
      └── stats: [rows=20]

# SELECT DISTINCT c FROM (SELECT * FROM abc ORDER BY c, a LIMIT 4)
expr
(DistinctOn
  (TopK
    (Scan [(Table "abc") (Cols "a,b,c")])
    [(K 4) (Ordering (OrderingChoice "+c,+a"))])
  []
  [(GroupingCols "c")])
----
distinct-on
 ├── columns: c:3(int)
 ├── grouping columns: c:3(int)
 ├── cardinality: [0 - 4]
 ├── stats: [rows=1.785252, distinct(3)=1.78525, null(3)=0]
 ├── key: (3)
 └── top-k
      ├── columns: a:1(int!null) b:2(int) c:3(int)
      ├── internal-ordering: +3,+1
      ├── k: 4
      ├── cardinality: [0 - 4]
      ├── stats: [rows=4, distinct(3)=1.78525, null(3)=0]
      ├── key: (1)
      ├── fd: (1)-->(2,3)
      └── scan abc
           ├── columns: a:1(int!null) b:2(int) c:3(int)
           ├── stats: [rows=20, distinct(3)=2, null(3)=0]
           │   histogram(3)=  0 10  0 10
           │                <--- 0 --- 1
           ├── key: (1)
           └── fd: (1)-->(2,3)

# SELECT DISTINCT a, b FROM (SELECT * FROM abc ORDER BY c, a LIMIT 4)
expr
(DistinctOn
  (TopK
    (Scan [(Table "abc") (Cols "a,b,c")])
    [(K 4) (Ordering (OrderingChoice "+c,+a"))])
  []
  [(GroupingCols "a,b")])
----
distinct-on
 ├── columns: a:1(int!null) b:2(int)
 ├── grouping columns: a:1(int!null) b:2(int)
 ├── cardinality: [0 - 4]
 ├── stats: [rows=4, distinct(1,2)=4, null(1,2)=0]
 ├── key: (1)
 ├── fd: (1)-->(2)
 └── top-k
      ├── columns: a:1(int!null) b:2(int) c:3(int)
      ├── internal-ordering: +3,+1
      ├── k: 4
      ├── cardinality: [0 - 4]
      ├── stats: [rows=4, distinct(1,2)=4, null(1,2)=0]
      ├── key: (1)
      ├── fd: (1)-->(2,3)
      └── scan abc
           ├── columns: a:1(int!null) b:2(int) c:3(int)
           ├── stats: [rows=20, distinct(1,2)=20, null(1,2)=0]
           ├── key: (1)
           └── fd: (1)-->(2,3)

# SELECT * FROM abc WHERE b > 5 ORDER BY c, a LIMIT 4
expr colstat=1 colstat=2 colstat=3
(TopK
  (Select
    (Scan [(Table "abc") (Cols "a,b,c")])
    [(Gt (Var "b") (Const 5 "int"))])
  [(K 4) (Ordering (OrderingChoice "+c,+a"))])
----
top-k
 ├── columns: a:1(int!null) b:2(int!null) c:3(int)
 ├── internal-ordering: +3,+1
 ├── k: 4
 ├── cardinality: [0 - 4]
 ├── stats: [rows=4, distinct(1)=4, null(1)=0, distinct(2)=3, null(2)=0, distinct(3)=1.86574, null(3)=0]
 ├── key: (1)
 ├── fd: (1)-->(2,3)
 └── select
      ├── columns: a:1(int!null) b:2(int!null) c:3(int)
      ├── stats: [rows=8, distinct(1)=8, null(1)=0, distinct(2)=4, null(2)=0, distinct(3)=1.98791, null(3)=0]
      │   histogram(2)=  0  2  0  2  0  2  0  2
      │                <--- 6 --- 7 --- 8 --- 9
      ├── key: (1)
      ├── fd: (1)-->(2,3)
      ├── scan abc
      │    ├── columns: a:1(int!null) b:2(int) c:3(int)
      │    ├── stats: [rows=20, distinct(1)=20, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=2, null(3)=0]
      │    │   histogram(1)=  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1
      │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9 --- 10 --- 11 --- 12 --- 13 --- 14 --- 15 --- 16 --- 17 --- 18 --- 19
      │    │   histogram(2)=  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2
      │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9
      │    │   histogram(3)=  0 10  0 10
      │    │                <--- 0 --- 1
      │    ├── key: (1)
      │    └── fd: (1)-->(2,3)
      └── filters
           └── b:2 > 5 [type=bool, outer=(2), constraints=(/2: [/6 - ]; tight)]

# SELECT DISTINCT c FROM (SELECT * FROM abc WHERE b > 5 ORDER BY c, a LIMIT 4)
expr
(DistinctOn
  (TopK
    (Select
      (Scan [(Table "abc") (Cols "a,b,c")])
      [(Gt (Var "b") (Const 5 "int"))])
    [(K 4) (Ordering (OrderingChoice "+c,+a"))])
  []
  [(GroupingCols "c")])
----
distinct-on
 ├── columns: c:3(int)
 ├── grouping columns: c:3(int)
 ├── cardinality: [0 - 4]
 ├── stats: [rows=1.865741, distinct(3)=1.86574, null(3)=0]
 ├── key: (3)
 └── top-k
      ├── columns: a:1(int!null) b:2(int!null) c:3(int)
      ├── internal-ordering: +3,+1
      ├── k: 4
      ├── cardinality: [0 - 4]
      ├── stats: [rows=4, distinct(3)=1.86574, null(3)=0]
      ├── key: (1)
      ├── fd: (1)-->(2,3)
      └── select
           ├── columns: a:1(int!null) b:2(int!null) c:3(int)
           ├── stats: [rows=8, distinct(2)=4, null(2)=0, distinct(3)=1.98791, null(3)=0]
           │   histogram(2)=  0  2  0  2  0  2  0  2
           │                <--- 6 --- 7 --- 8 --- 9
           ├── key: (1)
           ├── fd: (1)-->(2,3)
           ├── scan abc
           │    ├── columns: a:1(int!null) b:2(int) c:3(int)
           │    ├── stats: [rows=20, distinct(1)=20, null(1)=0, distinct(2)=10, null(2)=0, distinct(3)=2, null(3)=0]
           │    │   histogram(1)=  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1
           │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9 --- 10 --- 11 --- 12 --- 13 --- 14 --- 15 --- 16 --- 17 --- 18 --- 19
           │    │   histogram(2)=  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2
           │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9
           │    │   histogram(3)=  0 10  0 10
           │    │                <--- 0 --- 1
           │    ├── key: (1)
           │    └── fd: (1)-->(2,3)
           └── filters
                └── b:2 > 5 [type=bool, outer=(2), constraints=(/2: [/6 - ]; tight)]

# SELECT DISTINCT a, b FROM (SELECT * FROM abc WHERE b > 5 ORDER BY c, a LIMIT 4)
expr
(DistinctOn
  (TopK
    (Select
      (Scan [(Table "abc") (Cols "a,b,c")])
      [(Gt (Var "b") (Const 5 "int"))])
    [(K 4) (Ordering (OrderingChoice "+c,+a"))])
  []
  [(GroupingCols "a,b")])
----
distinct-on
 ├── columns: a:1(int!null) b:2(int!null)
 ├── grouping columns: a:1(int!null) b:2(int!null)
 ├── cardinality: [0 - 4]
 ├── stats: [rows=4, distinct(1,2)=4, null(1,2)=0]
 ├── key: (1)
 ├── fd: (1)-->(2)
 └── top-k
      ├── columns: a:1(int!null) b:2(int!null) c:3(int)
      ├── internal-ordering: +3,+1
      ├── k: 4
      ├── cardinality: [0 - 4]
      ├── stats: [rows=4, distinct(1,2)=4, null(1,2)=0]
      ├── key: (1)
      ├── fd: (1)-->(2,3)
      └── select
           ├── columns: a:1(int!null) b:2(int!null) c:3(int)
           ├── stats: [rows=8, distinct(2)=4, null(2)=0, distinct(1,2)=8, null(1,2)=0]
           │   histogram(2)=  0  2  0  2  0  2  0  2
           │                <--- 6 --- 7 --- 8 --- 9
           ├── key: (1)
           ├── fd: (1)-->(2,3)
           ├── scan abc
           │    ├── columns: a:1(int!null) b:2(int) c:3(int)
           │    ├── stats: [rows=20, distinct(1)=20, null(1)=0, distinct(2)=10, null(2)=0, distinct(1,2)=20, null(1,2)=0]
           │    │   histogram(1)=  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1
           │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9 --- 10 --- 11 --- 12 --- 13 --- 14 --- 15 --- 16 --- 17 --- 18 --- 19
           │    │   histogram(2)=  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2
           │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9
           │    ├── key: (1)
           │    └── fd: (1)-->(2,3)
           └── filters
                └── b:2 > 5 [type=bool, outer=(2), constraints=(/2: [/6 - ]; tight)]

# SELECT * FROM abc WHERE b IS NOT NULL ORDER BY c, a LIMIT 4
expr
(TopK
  (Select
    (Scan [(Table "abc") (Cols "a,b,c")])
    [(IsNot (Var "b") (Null "int"))])
  [(K 4) (Ordering (OrderingChoice "+c,+a"))])
----
top-k
 ├── columns: a:1(int!null) b:2(int!null) c:3(int)
 ├── internal-ordering: +3,+1
 ├── k: 4
 ├── cardinality: [0 - 4]
 ├── stats: [rows=4]
 ├── key: (1)
 ├── fd: (1)-->(2,3)
 └── select
      ├── columns: a:1(int!null) b:2(int!null) c:3(int)
      ├── stats: [rows=20, distinct(2)=10, null(2)=0]
      │   histogram(2)=  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2
      │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9
      ├── key: (1)
      ├── fd: (1)-->(2,3)
      ├── scan abc
      │    ├── columns: a:1(int!null) b:2(int) c:3(int)
      │    ├── stats: [rows=20, distinct(1)=20, null(1)=0, distinct(2)=10, null(2)=0]
      │    │   histogram(1)=  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1   0  1
      │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9 --- 10 --- 11 --- 12 --- 13 --- 14 --- 15 --- 16 --- 17 --- 18 --- 19
      │    │   histogram(2)=  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2  0  2
      │    │                <--- 0 --- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9
      │    ├── key: (1)
      │    └── fd: (1)-->(2,3)
      └── filters
           └── b:2 IS DISTINCT FROM CAST(NULL AS INT8) [type=bool, outer=(2), constraints=(/2: (/NULL - ]; tight)]
