# LogicTest: local

# Tests that verify we can create and use partial table statistics
# USING EXTREMES correctly.

# Verify that we can use partial statistics for a simple
# table that has small amount of values added to the end
# of the latest full statistic.
# We store multiple full stats to ensure that merged stats
# work with forecasted statistics as well.

let $forecastsEnabledPrev
SHOW CLUSTER SETTING sql.stats.forecasts.enabled

# First: enable statistics forecasts.
statement ok
SET CLUSTER SETTING sql.stats.forecasts.enabled = true

statement ok
CREATE TABLE g (b INT PRIMARY KEY)

statement ok
ALTER TABLE g INJECT STATISTICS '[
  {
    "id": 1,
    "avg_size": 1,
    "columns": [
      "b"
    ],
    "created_at": "1988-08-05 00:00:00.000000",
    "distinct_count": 3,
    "histo_buckets": [
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "0"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "1"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "2"
      }
    ],
    "histo_col_type": "INT8",
    "histo_version": 2,
    "name": "__auto__",
    "null_count": 0,
    "row_count": 3
  },
  {
    "id": 2,
    "avg_size": 1,
    "columns": [
      "b"
    ],
    "created_at": "1988-08-07 00:00:00.000000",
    "distinct_count": 9,
    "histo_buckets": [
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "0"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "1"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "2"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "3"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "4"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "5"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "6"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "7"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "8"
      }
    ],
    "histo_col_type": "INT8",
    "histo_version": 2,
    "name": "__auto__",
    "null_count": 0,
    "row_count": 9
  },
    {
      "id": 3,
      "avg_size": 2,
      "columns": [
        "b"
      ],
      "created_at": "1988-08-08 00:00:00.000000",
      "distinct_count": 3,
      "histo_buckets": [
        {
          "distinct_range": 0,
          "num_eq": 1,
          "num_range": 0,
          "upper_bound": "9"
        },
        {
          "distinct_range": 0,
          "num_eq": 1,
          "num_range": 0,
          "upper_bound": "10"
        },
        {
          "distinct_range": 0,
          "num_eq": 1,
          "num_range": 0,
          "upper_bound": "11"
        }
      ],
      "histo_col_type": "INT8",
      "histo_version": 2,
      "name": "partial",
      "null_count": 0,
      "partial_predicate": "(b < 0:::INT8) OR ((b > 8:::INT8) OR (b IS NULL))",
      "row_count": 3,
      "full_statistic_id": 2
    }
]'

query IITTTIIII colnames
SELECT histogram_id, full_histogram_id, statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE g WITH MERGE]
ORDER BY created
----
histogram_id  full_histogram_id  statistics_name  column_names  created                        row_count  distinct_count  null_count  avg_size
1             NULL               __auto__         {b}           1988-08-05 00:00:00 +0000 UTC  3          3               0           1
2             NULL               __auto__         {b}           1988-08-07 00:00:00 +0000 UTC  9          9               0           1
3             2                  partial          {b}           1988-08-08 00:00:00 +0000 UTC  3          3               0           2
0             NULL               __merged__       {b}           1988-08-08 00:00:00 +0000 UTC  12         12              0           1

query T
SELECT jsonb_pretty(stat)
FROM (
SELECT jsonb_array_elements(statistics) AS stat FROM [SHOW STATISTICS USING JSON FOR TABLE g WITH MERGE]
)
WHERE stat->>'name' = '__merged__';
----
{
    "avg_size": 1,
    "columns": [
        "b"
    ],
    "created_at": "1988-08-08 00:00:00",
    "distinct_count": 12,
    "histo_buckets": [
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "0"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "1"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "2"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "3"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "4"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "5"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "6"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "7"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "8"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "9"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "10"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "11"
        }
    ],
    "histo_col_type": "INT8",
    "histo_version": 3,
    "name": "__merged__",
    "null_count": 0,
    "row_count": 12
}

# The new merged statistic raises the number of full statistics to greater than
# the minimum threshold required to create forecasts, so the optimizer uses a
# forecasted statistic here of which the merged statistic is a part.
query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM g WHERE b >= 10
----
scan g
 ├── columns: b:1
 ├── constraint: /1: [/10 - ]
 ├── stats: [rows=7, distinct(1)=6, null(1)=0]
 │   histogram(1)=  0  2   0  1   0  1   0  1   0  1   0  1
 │                <--- 11 --- 12 --- 13 --- 14 --- 15 --- 17
 ├── cost: 25.09
 ├── key: (1)
 └── distribution: test

# Since merged stats can be used for forecasting, verify
# that merging and forecasting can work together.

query TTTIIII
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE g WITH MERGE, FORECAST]
ORDER BY created
----
__auto__      {b}  1988-08-05 00:00:00 +0000 UTC  3   3   0  1
__auto__      {b}  1988-08-07 00:00:00 +0000 UTC  9   9   0  1
partial       {b}  1988-08-08 00:00:00 +0000 UTC  3   3   0  2
__merged__    {b}  1988-08-08 00:00:00 +0000 UTC  12  12  0  1
__forecast__  {b}  1988-08-10 00:00:00 +0000 UTC  18  18  0  1

query T
SELECT jsonb_pretty(stat)
FROM (
SELECT jsonb_array_elements(statistics) AS stat FROM [SHOW STATISTICS USING JSON FOR TABLE g WITH MERGE, FORECAST]
)
WHERE stat->>'name' = '__forecast__';
----
{
    "avg_size": 1,
    "columns": [
        "b"
    ],
    "created_at": "1988-08-10 00:00:00",
    "distinct_count": 18,
    "histo_buckets": [
        {
            "distinct_range": 0,
            "num_eq": 2,
            "num_range": 0,
            "upper_bound": "0"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "1"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "2"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "3"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "4"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "5"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "6"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "7"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "8"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "9"
        },
        {
            "distinct_range": 0,
            "num_eq": 0,
            "num_range": 0,
            "upper_bound": "10"
        },
        {
            "distinct_range": 0,
            "num_eq": 2,
            "num_range": 0,
            "upper_bound": "11"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "12"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "13"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "14"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "15"
        },
        {
            "distinct_range": 0,
            "num_eq": 0,
            "num_range": 0,
            "upper_bound": "16"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "17"
        }
    ],
    "histo_col_type": "INT8",
    "histo_version": 3,
    "name": "__forecast__",
    "null_count": 0,
    "row_count": 18
}

# Verify that when the partial histogram is empty
# the returned statistic is the latest full statistic
# renamed to __merged__ with the created_at time
# updated to that of the partial statistic.

statement ok
CREATE TABLE h (h INT PRIMARY KEY)

statement ok
ALTER TABLE h INJECT STATISTICS '
 [
      {
          "id": 1,
          "avg_size": 1,
          "columns": [
              "h"
          ],
          "created_at": "2022-12-07 17:43:58.649416",
          "distinct_count": 4,
          "histo_buckets": [
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "10"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "20"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "30"
              },
              {
                  "distinct_range": 0,
                  "num_eq": 1,
                  "num_range": 0,
                  "upper_bound": "40"
              }
          ],
          "histo_col_type": "INT8",
          "histo_version": 2,
          "name": "full",
          "null_count": 0,
          "row_count": 4
      },
      {
           "avg_size": 0,
           "columns": [
               "h"
           ],
           "created_at": "2022-12-07 17:46:22.436487",
           "distinct_count": 0,
           "histo_col_type": "INT8",
           "histo_version": 2,
           "name": "partial",
           "null_count": 0,
           "partial_predicate": "(h < 10:::INT8) OR ((h > 40:::INT8) OR (h IS NULL))",
           "row_count": 0,
           "full_statistic_id": 1
       }
 ]'

query TTTIIII
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE h WITH MERGE]
ORDER BY created
----
full        {h}  2022-12-07 17:43:58.649416 +0000 UTC  4  4  0  1
partial     {h}  2022-12-07 17:46:22.436487 +0000 UTC  0  0  0  0
__merged__  {h}  2022-12-07 17:46:22.436487 +0000 UTC  4  4  0  1

query T
SELECT jsonb_pretty(stat)
FROM (
SELECT jsonb_array_elements(statistics) AS stat FROM [SHOW STATISTICS USING JSON FOR TABLE h WITH MERGE]
)
WHERE stat->>'name' = '__merged__';
----
{
    "avg_size": 1,
    "columns": [
        "h"
    ],
    "created_at": "2022-12-07 17:46:22.436487",
    "distinct_count": 4,
    "histo_buckets": [
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "10"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "20"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "30"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "40"
        }
    ],
    "histo_col_type": "INT8",
    "histo_version": 2,
    "id": 1,
    "name": "__merged__",
    "null_count": 0,
    "row_count": 4
}

# Verify partial statistics collection when we add
# values to the start and end of our column.

statement ok
CREATE TABLE i (i INT PRIMARY KEY)

statement ok
ALTER TABLE i INJECT STATISTICS '[
  {
    "id": 1,
    "avg_size": 1,
    "columns": [
      "i"
    ],
    "created_at": "1988-08-05 00:00:00.000000",
    "distinct_count": 3,
    "histo_buckets": [
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "0"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "1"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "2"
      }
    ],
    "histo_col_type": "INT8",
    "histo_version": 2,
    "name": "full_0",
    "null_count": 0,
    "row_count": 3
  },
  {
    "avg_size": 2,
    "columns": [
      "i"
    ],
    "created_at": "2022-12-07 19:57:07.781368",
    "distinct_count": 6,
    "histo_buckets": [
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "-3"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "-2"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "-1"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "3"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "4"
      },
      {
        "distinct_range": 0,
        "num_eq": 1,
        "num_range": 0,
        "upper_bound": "5"
      }
    ],
    "histo_col_type": "INT8",
    "histo_version": 2,
    "name": "sp",
    "null_count": 0,
    "partial_predicate": "(i < 0:::INT8) OR ((i > 3:::INT8) OR (i IS NULL))",
    "row_count": 6,
    "full_statistic_id": 1
  }
]'

query TTTIIII
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE i WITH MERGE]
ORDER BY created
----
full_0      {i}  1988-08-05 00:00:00 +0000 UTC         3  3  0  1
sp          {i}  2022-12-07 19:57:07.781368 +0000 UTC  6  6  0  2
__merged__  {i}  2022-12-07 19:57:07.781368 +0000 UTC  9  9  0  1

query T
SELECT jsonb_pretty(stat)
FROM (
SELECT jsonb_array_elements(statistics) AS stat FROM [SHOW STATISTICS USING JSON FOR TABLE i WITH MERGE]
)
WHERE stat->>'name' = '__merged__';
----
{
    "avg_size": 1,
    "columns": [
        "i"
    ],
    "created_at": "2022-12-07 19:57:07.781368",
    "distinct_count": 9,
    "histo_buckets": [
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "-3"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "-2"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "-1"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "0"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "1"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "2"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "3"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "4"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "5"
        }
    ],
    "histo_col_type": "INT8",
    "histo_version": 3,
    "name": "__merged__",
    "null_count": 0,
    "row_count": 9
}

statement ok
SET optimizer_use_merged_partial_statistics = on

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM i WHERE i = 6 OR i <= -1
----
scan i
 ├── columns: i:1
 ├── constraint: /1
 │    ├── [ - /-1]
 │    └── [/6 - /6]
 ├── stats: [rows=3, distinct(1)=3, null(1)=0]
 │   histogram(1)=  0  1   0  1   0  1   0  0
 │                <--- -3 --- -2 --- -1 --- 5
 ├── cost: 25.05
 ├── key: (1)
 └── distribution: test

# Verify that we can merge and use partial statistics
# for columns with non-zero NumRange and DistinctRange
# buckets, and a non-zero null count, both when the
# bucket sizes of each side are the same and when
# they are different.

statement ok
CREATE TABLE ab (a INT, b INT, INDEX (a, b));

statement ok
ALTER TABLE ab INJECT STATISTICS
'[
    {
       "id":1,
       "avg_size":3,
       "columns":[
          "b"
       ],
       "created_at":"2022-12-12 15:22:21.988179",
       "distinct_count":26,
       "histo_buckets":[
          {
             "distinct_range":0,
             "num_eq":1,
             "num_range":0,
             "upper_bound":"11"
          },
          {
             "distinct_range":3,
             "num_eq":1,
             "num_range":3,
             "upper_bound":"15"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"20"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"25"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"30"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"35"
          }
       ],
       "histo_col_type":"INT8",
       "histo_version":2,
       "name":"full_b",
       "null_count":3,
       "row_count":28
    },
    {
       "avg_size":3,
       "columns":[
          "b"
       ],
       "created_at":"2022-12-13 15:22:21.988179",
       "distinct_count":32,
       "histo_buckets":[
          {
             "distinct_range":0,
             "num_eq":1,
             "num_range":0,
             "upper_bound":"0"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"5"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"10"
          },
          {
             "distinct_range":0,
             "num_eq":1,
             "num_range":0,
             "upper_bound":"36"
          },
          {
             "distinct_range":3,
             "num_eq":1,
             "num_range":3,
             "upper_bound":"40"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"45"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"50"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"55"
          }
       ],
       "histo_col_type":"INT8",
       "histo_version":2,
       "name":"partial_b",
       "null_count":5,
       "row_count":36,
       "partial_predicate": "(b < 15:::INT8) OR ((b > 35:::INT8) OR (b IS NULL))",
       "full_statistic_id": 1
    },
    {
       "id":2,
       "avg_size":3,
       "columns":[
          "a"
       ],
       "created_at":"2022-12-12 15:22:21.988179",
       "distinct_count":26,
       "histo_buckets":[
          {
             "distinct_range":0,
             "num_eq":1,
             "num_range":0,
             "upper_bound":"11"
          },
          {
             "distinct_range":3,
             "num_eq":1,
             "num_range":3,
             "upper_bound":"15"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"20"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"25"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"30"
          },
          {
             "distinct_range":4,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"35"
          }
       ],
       "histo_col_type":"INT8",
       "histo_version":2,
       "name":"full_a",
       "null_count":3,
       "row_count":28
    },
    {
       "avg_size":3,
       "columns":[
          "a"
       ],
       "created_at":"2022-12-14 15:22:21.988179",
       "distinct_count":28,
       "histo_buckets":[
          {
             "distinct_range":0,
             "num_eq":1,
             "num_range":0,
             "upper_bound":"0"
          },
          {
             "distinct_range":9,
             "num_eq":1,
             "num_range":9,
             "upper_bound":"10"
          },
          {
             "distinct_range":0,
             "num_eq":1,
             "num_range":0,
             "upper_bound":"36"
          },
          {
             "distinct_range":2,
             "num_eq":1,
             "num_range":3,
             "upper_bound":"40"
          },
          {
             "distinct_range":3,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"44"
          },
          {
             "distinct_range":3,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"48"
          },
          {
             "distinct_range":3,
             "num_eq":1,
             "num_range":4,
             "upper_bound":"52"
          }
       ],
       "histo_col_type":"INT8",
       "histo_version":2,
       "name":"partial_a",
       "null_count":5,
       "row_count":36,
       "partial_predicate": "(b < 15:::INT8) OR ((b > 35:::INT8) OR (b IS NULL))",
       "full_statistic_id": 2
    }
 ]'

query TTTIIII colnames
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE ab WITH MERGE]
ORDER BY created
----
statistics_name  column_names  created                               row_count  distinct_count  null_count  avg_size
full_a           {a}           2022-12-12 15:22:21.988179 +0000 UTC  28         26              3           3
full_b           {b}           2022-12-12 15:22:21.988179 +0000 UTC  28         26              3           3
partial_b        {b}           2022-12-13 15:22:21.988179 +0000 UTC  36         32              5           3
__merged__       {b}           2022-12-13 15:22:21.988179 +0000 UTC  61         57              5           3
partial_a        {a}           2022-12-14 15:22:21.988179 +0000 UTC  36         28              5           3
__merged__       {a}           2022-12-14 15:22:21.988179 +0000 UTC  61         53              5           3

query T
SELECT jsonb_pretty(stat)
FROM (
SELECT jsonb_array_elements(statistics) AS stat FROM [SHOW STATISTICS USING JSON FOR TABLE ab WITH MERGE]
)
WHERE stat->>'name' = '__merged__' ORDER BY stat->>'created_at';
----
{
    "avg_size": 3,
    "columns": [
        "b"
    ],
    "created_at": "2022-12-13 15:22:21.988179",
    "distinct_count": 57,
    "histo_buckets": [
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "0"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "5"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "10"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "11"
        },
        {
            "distinct_range": 3,
            "num_eq": 1,
            "num_range": 3,
            "upper_bound": "15"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "20"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "25"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "30"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "35"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "36"
        },
        {
            "distinct_range": 3,
            "num_eq": 1,
            "num_range": 3,
            "upper_bound": "40"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "45"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "50"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "55"
        }
    ],
    "histo_col_type": "INT8",
    "histo_version": 3,
    "name": "__merged__",
    "null_count": 5,
    "row_count": 61
}
{
    "avg_size": 3,
    "columns": [
        "a"
    ],
    "created_at": "2022-12-14 15:22:21.988179",
    "distinct_count": 53,
    "histo_buckets": [
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "0"
        },
        {
            "distinct_range": 9,
            "num_eq": 1,
            "num_range": 9,
            "upper_bound": "10"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "11"
        },
        {
            "distinct_range": 3,
            "num_eq": 1,
            "num_range": 3,
            "upper_bound": "15"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "20"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "25"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "30"
        },
        {
            "distinct_range": 4,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "35"
        },
        {
            "distinct_range": 0,
            "num_eq": 1,
            "num_range": 0,
            "upper_bound": "36"
        },
        {
            "distinct_range": 2,
            "num_eq": 1,
            "num_range": 3,
            "upper_bound": "40"
        },
        {
            "distinct_range": 3,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "44"
        },
        {
            "distinct_range": 3,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "48"
        },
        {
            "distinct_range": 3,
            "num_eq": 1,
            "num_range": 4,
            "upper_bound": "52"
        }
    ],
    "histo_col_type": "INT8",
    "histo_version": 3,
    "name": "__merged__",
    "null_count": 5,
    "row_count": 61
}

# Verify that dropping a column removes it from the SHOW STATISTICS output.
statement ok
ALTER TABLE ab DROP COLUMN a

query TTTIIII colnames
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE ab]
ORDER BY created
----
statistics_name  column_names  created                               row_count  distinct_count  null_count  avg_size
full_b           {b}           2022-12-12 15:22:21.988179 +0000 UTC  28         26              3           3
partial_b        {b}           2022-12-13 15:22:21.988179 +0000 UTC  36         32              5           3

query TTTIIII colnames
SELECT statistics_name, column_names, created, row_count, distinct_count, null_count, avg_size
FROM [SHOW STATISTICS FOR TABLE ab WITH MERGE]
ORDER BY created
----
statistics_name  column_names  created                               row_count  distinct_count  null_count  avg_size
full_b           {b}           2022-12-12 15:22:21.988179 +0000 UTC  28         26              3           3
partial_b        {b}           2022-12-13 15:22:21.988179 +0000 UTC  36         32              5           3
__merged__       {b}           2022-12-13 15:22:21.988179 +0000 UTC  61         57              5           3

# Finally, restore forecasts setting to its previous value.
statement ok
SET CLUSTER SETTING sql.stats.forecasts.enabled = $forecastsEnabledPrev

# Verify that we can merge partial stats with full stats that have outer
# buckets.
statement ok
CREATE TABLE ka (k INT PRIMARY KEY, a INT, INDEX(a))

statement ok
INSERT INTO ka select x, x from generate_series(0, 9998) as g(x)

statement ok
INSERT INTO ka VALUES (9999, NULL)

statement ok
CREATE STATISTICS ka_fullstat ON a FROM ka

# Clear the stat cache so that creating partial statistics has access to the
# latest full statistic.
statement ok
SELECT crdb_internal.clear_table_stats_cache();

let $hist_id_ka_outer_buckets_full
SELECT histogram_id FROM [SHOW STATISTICS FOR TABLE ka] WHERE statistics_name = 'ka_fullstat'

# The full stats collection should have added 2 outer buckets for a total of 202
# with upper bounds of MaxInt64 and MinInt64.
query I
SELECT count(*) FROM [SHOW HISTOGRAM $hist_id_ka_outer_buckets_full]
----
202

statement ok
INSERT INTO ka VALUES (10000, 9999), (10001, 10000)

statement ok
CREATE STATISTICS ka_partialstat ON a FROM ka USING EXTREMES

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE ka WITH MERGE]
ORDER BY statistics_name
----
statistics_name  column_names  row_count  distinct_count  null_count
__merged__       {a}           10002      9922            1
ka_fullstat      {a}           10000      9920            1
ka_partialstat   {a}           3          3               1

# Verify that the merged histogram correctly appends the partial stat buckets
query T
SELECT jsonb_pretty(bucket)
FROM (
SELECT jsonb_array_elements(stat->'histo_buckets') AS bucket
FROM (
SELECT jsonb_array_elements(statistics) AS stat FROM [SHOW STATISTICS USING JSON FOR TABLE ka WITH MERGE]
)
WHERE stat->>'name' = '__merged__'
)
ORDER BY (bucket->>'upper_bound')::INT DESC
LIMIT 3
----
{
    "distinct_range": 0,
    "num_eq": 1,
    "num_range": 0,
    "upper_bound": "10000"
}
{
    "distinct_range": 0,
    "num_eq": 1,
    "num_range": 0,
    "upper_bound": "9999"
}
{
    "distinct_range": 49.591730440470876,
    "num_eq": 1,
    "num_range": 50,
    "upper_bound": "9998"
}

# Verify that distinct counts and null counts are merged correctly.
statement ok
SET CLUSTER SETTING sql.stats.histogram_samples.count = 10005

statement ok
INSERT INTO ka VALUES (10002, 10001)

statement ok
CREATE STATISTICS ka_fullstat ON a FROM ka

# Clear the stat cache so that creating partial statistics has access to the
# latest full statistic.
statement ok
SELECT crdb_internal.clear_table_stats_cache();

statement ok
INSERT INTO ka VALUES (10003, 10002), (10004, NULL)

statement ok
CREATE STATISTICS ka_partialstat ON a FROM ka USING EXTREMES

query TTIII colnames
SELECT statistics_name, column_names, row_count, distinct_count, null_count
FROM [SHOW STATISTICS FOR TABLE ka WITH MERGE]
ORDER BY statistics_name
----
statistics_name  column_names  row_count  distinct_count  null_count
__merged__       {a}           10005      9924            2
ka_fullstat      {a}           10003      9923            1
ka_partialstat   {a}           3          2               2

statement ok
RESET CLUSTER SETTING sql.stats.histogram_samples.count

# Verify that optimizer_use_merged_partial_statistics can be used to enable and
# disable merged stat usage in the optimizer.
statement ok
RESET optimizer_use_merged_partial_statistics

statement ok
DELETE FROM ka

statement ok
INSERT INTO ka SELECT x, x FROM generate_series(0, 9) as g(x)

statement ok
CREATE STATISTICS ka_fullstat ON a FROM ka

# Clear the stat cache so that creating partial statistics has access to the
# latest full statistic.
statement ok
SELECT crdb_internal.clear_table_stats_cache();

statement ok
INSERT INTO ka VALUES (10, 10)

statement ok
CREATE STATISTICS ka_partialstat ON a FROM ka USING EXTREMES

query T retry
EXPLAIN SELECT * FROM ka WHERE a > 5
----
distribution: local
vectorized: true
·
• scan
  estimated row count: 4 (36% of the table; stats collected <hidden> ago)
  table: ka@ka_a_idx
  spans: [/6 - ]

query T
EXPLAIN SELECT * FROM ka WHERE a = 10
----
distribution: local
vectorized: true
·
• scan
  estimated row count: 0 (<0.01% of the table; stats collected <hidden> ago)
  table: ka@ka_a_idx
  spans: [/10 - /10]

statement ok
SET optimizer_use_merged_partial_statistics = on

query T
EXPLAIN SELECT * FROM ka WHERE a > 5
----
distribution: local
vectorized: true
·
• scan
  estimated row count: 5 (45% of the table; stats collected <hidden> ago)
  table: ka@ka_a_idx
  spans: [/6 - ]

query T
EXPLAIN SELECT * FROM ka WHERE a = 10
----
distribution: local
vectorized: true
·
• scan
  estimated row count: 1 (9.1% of the table; stats collected <hidden> ago)
  table: ka@ka_a_idx
  spans: [/10 - /10]
