# LogicTest: local

statement ok
CREATE TABLE uv (u INT, v INT, INDEX (u) STORING (v), INDEX (v) STORING (u));
INSERT INTO uv VALUES (1, 1), (1, 2), (1, 3), (1, 4), (2, 4), (2, 5), (2, 6), (2, 7)

statement ok
CREATE STATISTICS u ON u FROM uv;
CREATE STATISTICS v ON v FROM uv

query TTIIIB colnames,rowsort
SELECT
  statistics_name,
  column_names,
  row_count,
  distinct_count,
  null_count,
  histogram_id IS NOT NULL AS has_histogram
FROM
  [SHOW STATISTICS FOR TABLE uv]
----
statistics_name  column_names  row_count  distinct_count  null_count  has_histogram
u                {u}           8          2               0           true
v                {v}           8          7               0           true

statement ok
set enable_zigzag_join = false

# Verify we scan index v which has the more selective constraint.
query T retry
EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
----
distribution: local
vectorized: true
·
• filter
│ columns: (u, v)
│ estimated row count: 1
│ filter: u = 1
│
└── • scan
      columns: (u, v)
      estimated row count: 1 (13% of the table; stats collected <hidden> ago)
      table: uv@uv_v_idx
      spans: /1-/2

# Verify that injecting different statistics changes the plan.
statement ok
ALTER TABLE uv INJECT STATISTICS '[
  {
    "columns": ["u"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 100
  },
  {
    "columns": ["v"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 10
  }
]'

query T
EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
----
distribution: local
vectorized: true
·
• filter
│ columns: (u, v)
│ estimated row count: 0
│ filter: v = 1
│
└── • scan
      columns: (u, v)
      estimated row count: 1 (1.0% of the table; stats collected <hidden> ago)
      table: uv@uv_u_idx
      spans: /1-/2

# Verify that injecting different statistics with null counts
# changes the plan.
statement ok
ALTER TABLE uv INJECT STATISTICS '[
  {
    "columns": ["u"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 20,
    "null_count": 0
  },
  {
    "columns": ["v"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 10,
    "null_count": 0
  }
]'

query T
EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
----
distribution: local
vectorized: true
·
• filter
│ columns: (u, v)
│ estimated row count: 1
│ filter: v = 1
│
└── • scan
      columns: (u, v)
      estimated row count: 5 (5.0% of the table; stats collected <hidden> ago)
      table: uv@uv_u_idx
      spans: /1-/2

statement ok
ALTER TABLE uv INJECT STATISTICS '[
  {
    "columns": ["u"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 20,
    "null_count": 0
  },
  {
    "columns": ["v"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 10,
    "null_count": 90
  }
]'

query T
EXPLAIN (VERBOSE) SELECT * FROM uv WHERE u = 1 AND v = 1
----
distribution: local
vectorized: true
·
• filter
│ columns: (u, v)
│ estimated row count: 0
│ filter: u = 1
│
└── • scan
      columns: (u, v)
      estimated row count: 1 (1.1% of the table; stats collected <hidden> ago)
      table: uv@uv_v_idx
      spans: /1-/2

statement ok
ALTER TABLE uv INJECT STATISTICS '[
  {
    "columns": ["u"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 20,
    "null_count": 0,
    "histo_col_type":"INT4",
    "histo_buckets":[{
      "num_eq":50,
      "num_range":0,
      "distinct_range":0,
      "upper_bound":"1"
    },
    {
      "num_eq":20,
      "num_range":0,
      "distinct_range":0,
      "upper_bound":"2"
    },
    {
      "num_eq":5,
      "num_range":8,
      "distinct_range":7,
      "upper_bound":"10"
    },
    {
      "num_eq":5,
      "num_range":12,
      "distinct_range":9,
      "upper_bound":"20"
    }]
  },
  {
    "columns": ["v"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 10,
    "null_count": 90
  },
  {
    "columns": ["u", "v"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 25,
    "null_count": 90
  }
]'

# Test that we respect the session settings for using histograms and
# multi-column stats.
statement ok
set optimizer_use_histograms = false

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
----
distinct-on
 ├── columns: u:1 v:2
 ├── grouping columns: u:1 v:2
 ├── stats: [rows=20.06173, distinct(1,2)=20.0617, null(1,2)=0]
 ├── cost: 55.917284
 ├── key: (1,2)
 ├── distribution: test
 └── scan uv@uv_u_idx
      ├── columns: u:1 v:2
      ├── constraint: /1/3: (/NULL - /29]
      ├── stats: [rows=33.33333, distinct(1)=6.66667, null(1)=0, distinct(1,2)=20.0617, null(1,2)=0]
      ├── cost: 54.6866667
      ├── distribution: test
      ├── prune: (2)
      └── interesting orderings: (+1) (+2)

statement ok
set optimizer_use_multicol_stats = false

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
----
distinct-on
 ├── columns: u:1 v:2
 ├── grouping columns: u:1 v:2
 ├── stats: [rows=33.33333, distinct(1,2)=33.3333, null(1,2)=0]
 ├── cost: 56.05
 ├── key: (1,2)
 ├── distribution: test
 └── scan uv@uv_u_idx
      ├── columns: u:1 v:2
      ├── constraint: /1/3: (/NULL - /29]
      ├── stats: [rows=33.33333, distinct(1)=6.66667, null(1)=0, distinct(1,2)=33.3333, null(1,2)=0]
      ├── cost: 54.6866667
      ├── distribution: test
      ├── prune: (2)
      └── interesting orderings: (+1) (+2)

statement ok
set optimizer_use_histograms = true

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
----
distinct-on
 ├── columns: u:1 v:2
 ├── grouping columns: u:1 v:2
 ├── stats: [rows=100, distinct(1,2)=100, null(1,2)=0]
 ├── cost: 132.050563
 ├── key: (1,2)
 ├── distribution: test
 └── scan uv@uv_u_idx
      ├── columns: u:1 v:2
      ├── constraint: /1/3: (/NULL - /29]
      ├── stats: [rows=100, distinct(1)=20, null(1)=0, distinct(1,2)=100, null(1,2)=0]
      │   histogram(1)=  0 50  0 20  8  5   12  5
      │                <--- 1 --- 2 --- 10 ---- 20
      ├── cost: 128.02
      ├── distribution: test
      ├── prune: (2)
      └── interesting orderings: (+1) (+2)

statement ok
set optimizer_use_multicol_stats = true

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM uv WHERE u < 30 GROUP BY u, v
----
distinct-on
 ├── columns: u:1 v:2
 ├── grouping columns: u:1 v:2
 ├── stats: [rows=25, distinct(1,2)=25, null(1,2)=0]
 ├── cost: 131.3
 ├── key: (1,2)
 ├── distribution: test
 └── scan uv@uv_u_idx
      ├── columns: u:1 v:2
      ├── constraint: /1/3: (/NULL - /29]
      ├── stats: [rows=100, distinct(1)=20, null(1)=0, distinct(1,2)=25, null(1,2)=0]
      │   histogram(1)=  0 50  0 20  8  5   12  5
      │                <--- 1 --- 2 --- 10 ---- 20
      ├── cost: 128.02
      ├── distribution: test
      ├── prune: (2)
      └── interesting orderings: (+1) (+2)

# Verify basic stats for JSON are used.

statement ok
CREATE TABLE tj (j JSON)

statement ok
INSERT INTO tj VALUES (NULL), ('1'), ('true'), ('true'), ('{}')

query T
EXPLAIN (OPT, VERBOSE) SELECT DISTINCT j FROM tj WHERE j IS NULL
----
limit
 ├── columns: j:1
 ├── cardinality: [0 - 1]
 ├── immutable
 ├── stats: [rows=1]
 ├── cost: 125.069998
 ├── key: ()
 ├── fd: ()-->(1)
 ├── distribution: test
 ├── select
 │    ├── columns: j:1
 │    ├── immutable
 │    ├── stats: [rows=10, distinct(1)=1, null(1)=10]
 │    ├── cost: 125.049998
 │    ├── fd: ()-->(1)
 │    ├── limit hint: 1.00
 │    ├── distribution: test
 │    ├── scan tj
 │    │    ├── columns: j:1
 │    │    ├── stats: [rows=1000, distinct(1)=100, null(1)=10]
 │    │    ├── cost: 124.019998
 │    │    ├── limit hint: 100.00
 │    │    ├── distribution: test
 │    │    └── prune: (1)
 │    └── filters
 │         └── j:1 IS NULL [outer=(1), immutable, constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)]
 └── 1

statement ok
CREATE STATISTICS tj FROM tj

query T retry
EXPLAIN (OPT, VERBOSE) SELECT DISTINCT j FROM tj WHERE j IS NULL
----
limit
 ├── columns: j:1
 ├── cardinality: [0 - 1]
 ├── immutable
 ├── stats: [rows=1]
 ├── cost: 23.795
 ├── key: ()
 ├── fd: ()-->(1)
 ├── distribution: test
 ├── select
 │    ├── columns: j:1
 │    ├── immutable
 │    ├── stats: [rows=1, distinct(1)=1, null(1)=1]
 │    │   histogram(1)=  0   1
 │    │                <--- NULL
 │    ├── cost: 23.775
 │    ├── fd: ()-->(1)
 │    ├── limit hint: 1.00
 │    ├── distribution: test
 │    ├── scan tj
 │    │    ├── columns: j:1
 │    │    ├── stats: [rows=5, distinct(1)=4, null(1)=1]
 │    │    │   histogram(1)=  0   1    0   1   2   1
 │    │    │                <--- NULL --- '1' --- '{}'
 │    │    ├── cost: 23.695
 │    │    ├── limit hint: 5.00
 │    │    ├── distribution: test
 │    │    └── prune: (1)
 │    └── filters
 │         └── j:1 IS NULL [outer=(1), immutable, constraints=(/1: [/NULL - /NULL]; tight), fd=()-->(1)]
 └── 1

# Ensure we can run ALTER statements on the system.jobs table.
statement ok
INSERT INTO system.users VALUES ('node', NULL, true, 3);

statement ok
GRANT node TO root;

# Ensure that stats on the system.jobs table are being used.
statement ok
ALTER TABLE system.jobs INJECT STATISTICS '[
    {
        "avg_size": 7,
        "columns": [
            "id"
        ],
        "created_at": "2024-02-02 22:56:02.854028",
        "distinct_count": 19,
        "histo_col_type": "INT8",
        "histo_version": 3,
        "null_count": 0,
        "row_count": 19
    }
]';

query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM system.jobs;
----
scan jobs
 ├── columns: id:1 status:2 created:3 created_by_type:6 created_by_id:7 claim_session_id:8 claim_instance_id:9 num_runs:10 last_run:11 job_type:12 owner:13 description:14 error_msg:15 finished:16
 ├── partial index predicates
 │    └── jobs_run_stats_idx: filters
 │         └── status:2 IN ('cancel-requested', 'pause-requested', 'pending', 'reverting', 'running') [outer=(2), constraints=(/2: [/'cancel-requested' - /'cancel-requested'] [/'pause-requested' - /'pause-requested'] [/'pending' - /'pending'] [/'reverting' - /'reverting'] [/'running' - /'running']; tight)]
 ├── stats: [rows=19]
 ├── cost: 65.29
 ├── key: (1)
 ├── fd: (1)-->(2,3,6-16)
 ├── distribution: test
 └── prune: (1-3,6-16)

# Regression test for #125963. Ensure that we get reasonable stats estimates if
# histograms are empty. (The test is here since the bug in #125963 doesn't
# reproduce with optimizer tests.)

statement ok
CREATE TABLE tab (
  id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
  a STRING,
  b STRING,
  c STRING,
  d STRING,
  INDEX tab_a_b_c_idx (a ASC, b ASC, c ASC)
);

statement ok
ALTER TABLE tab INJECT STATISTICS '[
    {
        "columns": [
            "id"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 10000000,
        "histo_col_type": "UUID",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    },
    {
        "columns": [
            "a"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 10000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    },
    {
        "columns": [
            "a",
            "b"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 501000,
        "histo_col_type": "",
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    },
    {
        "columns": [
            "a",
            "b",
            "c"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 1001000,
        "histo_col_type": "",
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    },
    {
        "columns": [
            "b"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 500000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    },
    {
        "columns": [
            "c"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 1000000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    },
    {
        "columns": [
            "d"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 1000000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    }
]';

query T
EXPLAIN SELECT
  id,
  a,
  b,
  c,
  d
FROM
  tab@{FORCE_INDEX=tab_a_b_c_idx,NO_FULL_SCAN}
WHERE
  a = 'a' AND b = 'b' AND c = 'c'
----
distribution: local
vectorized: true
·
• index join
│ estimated row count: 9
│ table: tab@tab_pkey
│
└── • scan
      estimated row count: 9 (<0.01% of the table; stats collected <hidden> ago)
      table: tab@tab_a_b_c_idx
      spans: [/'a'/'b'/'c' - /'a'/'b'/'c']

# Same stats as above but with some null values.
statement ok
ALTER TABLE tab INJECT STATISTICS '[
    {
        "columns": [
            "id"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 10000000,
        "histo_col_type": "UUID",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 0,
        "row_count": 10000000
    },
    {
        "columns": [
            "a"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 10000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 10,
        "row_count": 10000000
    },
    {
        "columns": [
            "a",
            "b"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 501000,
        "histo_col_type": "",
        "name": "__auto__",
        "null_count": 10,
        "row_count": 10000000
    },
    {
        "columns": [
            "a",
            "b",
            "c"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 1001000,
        "histo_col_type": "",
        "name": "__auto__",
        "null_count": 10,
        "row_count": 10000000
    },
    {
        "columns": [
            "b"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 500000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 10,
        "row_count": 10000000
    },
    {
        "columns": [
            "c"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 1000000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 10,
        "row_count": 10000000
    },
    {
        "columns": [
            "d"
        ],
        "created_at": "2024-01-01 00:00:00",
        "distinct_count": 1000000,
        "histo_col_type": "STRING",
        "histo_version": 2,
        "name": "__auto__",
        "null_count": 10,
        "row_count": 10000000
    }
]';

query T
EXPLAIN SELECT
  id,
  a,
  b,
  c,
  d
FROM
  tab@{FORCE_INDEX=tab_a_b_c_idx,NO_FULL_SCAN}
WHERE
  a = 'a' AND b = 'b' AND c = 'c'
----
distribution: local
vectorized: true
·
• index join
│ estimated row count: 9
│ table: tab@tab_pkey
│
└── • scan
      estimated row count: 9 (<0.01% of the table; stats collected <hidden> ago)
      table: tab@tab_a_b_c_idx
      spans: [/'a'/'b'/'c' - /'a'/'b'/'c']
