# LogicTest: local

# Regression test for incorrectly using the key-encoding for collated strings
# as upper bounds of histogram buckets (#98400).
statement ok
CREATE TABLE t98400 (k INT PRIMARY KEY, s STRING COLLATE en_US_u_ks_level2, FAMILY (k, s));

statement ok
ALTER TABLE t98400 INJECT STATISTICS '[
    {
        "avg_size": 1,
        "columns": [
            "k"
        ],
        "created_at": "2024-01-19 01:29:41.830253",
        "distinct_count": 13,
        "histo_buckets": [
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "1"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "2"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "3"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "4"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "5"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "6"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "7"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "8"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "9"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "10"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "11"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "12"
            },
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "13"
            }
        ],
        "histo_col_type": "INT8",
        "histo_version": 3,
        "null_count": 0,
        "row_count": 13
    },
    {
        "avg_size": 7,
        "columns": [
            "s"
        ],
        "created_at": "2024-01-19 01:29:41.830253",
        "distinct_count": 3,
        "histo_buckets": [
            {
                "distinct_range": 0,
                "num_eq": 1,
                "num_range": 0,
                "upper_bound": "''foo'' COLLATE en_US_u_ks_level2"
            },
            {
                "distinct_range": 1,
                "num_eq": 2,
                "num_range": 10,
                "upper_bound": "''world'' COLLATE en_US_u_ks_level2"
            }
        ],
        "histo_col_type": "STRING COLLATE en_US_u_ks_level2",
        "histo_version": 3,
        "null_count": 0,
        "row_count": 13
    }
]'

# We expect that the filter is estimated to match 10 rows.
query T
EXPLAIN (OPT, VERBOSE) SELECT * FROM t98400 WHERE s = 'hello' COLLATE en_US_u_ks_level2;
----
select
 ├── columns: k:1 s:2
 ├── stats: [rows=10, distinct(2)=1, null(2)=0]
 │   histogram(2)=  0                 10
 │                <--- 'hello' COLLATE en_US_u_ks_level2
 ├── cost: 43.02
 ├── key: (1)
 ├── fd: ()-->(2)
 ├── distribution: test
 ├── prune: (1)
 ├── scan t98400
 │    ├── columns: k:1 s:2
 │    ├── stats: [rows=13, distinct(1)=13, null(1)=0, distinct(2)=3, null(2)=0]
 │    │   histogram(1)=  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1  0  1   0  1   0  1   0  1
 │    │                <--- 1 --- 2 --- 3 --- 4 --- 5 --- 6 --- 7 --- 8 --- 9 --- 10 --- 11 --- 12 --- 13
 │    │   histogram(2)=  0                 1                 10                  2
 │    │                <--- 'foo' COLLATE en_US_u_ks_level2 ---- 'world' COLLATE en_US_u_ks_level2
 │    ├── cost: 42.86
 │    ├── key: (1)
 │    ├── fd: (1)-->(2)
 │    ├── distribution: test
 │    └── prune: (1,2)
 └── filters
      └── s:2 = 'hello' COLLATE en_US_u_ks_level2 [outer=(2), constraints=(/2: [/'hello' COLLATE en_US_u_ks_level2 - /'hello' COLLATE en_US_u_ks_level2]; tight), fd=()-->(2)]
