# LogicTest: local

statement ok
CREATE TABLE a (a INT, b INT, c INT4, PRIMARY KEY (a, b))

statement ok
CREATE TABLE c (a INT, b INT, c INT, d INT, PRIMARY KEY (a, c), INDEX sec (b))

statement ok
CREATE TABLE d (a INT, b INT, PRIMARY KEY (b, a))

statement ok
INSERT INTO a SELECT g//2, g, g FROM generate_series(0,2000) g(g)

statement ok
INSERT INTO c VALUES (1, 1, 1, 0), (2, 1, 2, 0)

statement ok
ALTER TABLE c INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 1,
    "distinct_count": 1
  }
]'

statement ok
INSERT INTO d VALUES (1, 1), (1, 2)

# Test that vectorized stats are collected correctly.
statement ok
SET vectorize = on

statement ok
SET distsql = on

query T
EXPLAIN ANALYZE (DISTSQL) SELECT a FROM a
----
planning time: 10µs
execution time: 100µs
distribution: <hidden>
vectorized: <hidden>
plan type: custom
rows decoded from KV: 2,001 (16 KiB, 4,002 KVs, 2,001 gRPC calls)
maximum memory usage: <hidden>
network usage: <hidden>
regions: <hidden>
isolation level: serializable
priority: normal
quality of service: regular
·
• scan
  sql nodes: <hidden>
  kv nodes: <hidden>
  regions: <hidden>
  actual row count: 2,001
  KV time: 0µs
  KV contention time: 0µs
  KV rows decoded: 2,001
  KV pairs read: 4,002
  KV bytes read: 16 KiB
  KV gRPC calls: 2,001
  estimated max memory allocated: 0 B
  missing stats
  table: a@a_pkey
  spans: FULL SCAN
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJyMUsFqGzEQvfcrxJxaUIg2lB50KnVSMKmbYJtcigmyNHGEtdJWM4ttzH5Wf6BfVrSyoWkb6ByWnTdv573h7RHoewANi5svN5OlMOLz_G4mDEiIyeFX0yKB_gYNrCR0OVkkSrlAx5EwdXvQSoKPXc8FXkmwKSPoI7DngKBhadYB52gc5ksFEhyy8WFcaz6ax26LB5AwSaFvI-lRe9GZ8noBEm4fRBEiLWJT24wbn8qYkbhC7FvUQv38QbW3KTJG9in-NcppR8KhTQ6dFldSqdPa9YGRREbjtGg-iFv_qeKb-f1EWBMCvaB3xucz_b1U6gokzB4mE0GMnbCpjyze4p4vfeR3Wqjx9EpA3L5GaM1etNimfBAmhGQNF5dKFC9rw_YZSaSeu561KPzxmjNQ3a0GCRU55UFsNgi6-S3A6TVoNcj_z3CO1KVI-CK-15TUH0oXzbCSgG6D9ceh1GeL9znZkVvbu3HRCDgkrtOmNtN4HhFnNG21v5LwFNLu0TvQoE518Y_HuaB8YDZUDls8p924dnnoiq0nEwglzMwWr5Extz56Ym9Bc-5xGN78CgAA___XMf2r

query T
EXPLAIN ANALYZE (DISTSQL) SELECT c.a FROM c JOIN d ON d.b = c.b
----
planning time: 10µs
execution time: 100µs
distribution: <hidden>
vectorized: <hidden>
plan type: custom
rows decoded from KV: 3 (24 B, 6 KVs, 3 gRPC calls)
maximum memory usage: <hidden>
network usage: <hidden>
regions: <hidden>
isolation level: serializable
priority: normal
quality of service: regular
·
• lookup join (streamer)
│ sql nodes: <hidden>
│ kv nodes: <hidden>
│ regions: <hidden>
│ actual row count: 2
│ KV time: 0µs
│ KV contention time: 0µs
│ KV rows decoded: 1
│ KV pairs read: 2
│ KV bytes read: 8 B
│ KV gRPC calls: 1
│ estimated max memory allocated: 0 B
│ table: d@d_pkey
│ equality: (b) = (b)
│
└── • scan
      sql nodes: <hidden>
      kv nodes: <hidden>
      regions: <hidden>
      actual row count: 2
      KV time: 0µs
      KV contention time: 0µs
      KV rows decoded: 2
      KV pairs read: 4
      KV bytes read: 16 B
      KV gRPC calls: 2
      estimated max memory allocated: 0 B
      estimated row count: 1 (100% of the table; stats collected <hidden> ago)
      table: c@sec
      spans: FULL SCAN
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJy0VGFLG00Q_v7-imE-vYVV745SykIhNFqIVSNR_FKCbHbHuM3d7nV3DhMkP6t_oL-s7K0RNSot1PsQeJ558szcPMPdYvxRo8Szg6OD4TnoXQVfJuNj0HA4Hp2AgfEJmN0ZfAK9O0OBzhs6UQ1FlN-wxKnANnhNMfqQqNteMDJLlIVA69qOEz0VqH0glLfIlmtCiedqVtOElKGwV6BAQ6xs3dvqQSSNAoe-7hoXJSgBqfdZqxLaQYFfLyA1ihJcmWGgufWpzBQ5U2wbklD8-hkz1t4xObbebZWCv4lgSHtDRkKVydmKKUIgZSSUH-BzZueT0yFoVdfxXtgqGzbC9yjw-GI4hMjUgvadY_iflrxnHb-TUPQvmwVEi5cEjVpCQ40PK1B17bXiNFfRzzBTrK8pgu-47VhC0vfzb4gKp2uBGd1tP7KaE8ryQVyjfZTFWvx5YofeurvAyseBmYG5bBe0QoFH3i-6Fr5768A7CYPqYZApxXEacZAc-mb96vMmM46s6noroDdMu9xO--NzYZfbYVf_JOwukoHIgVRDAQXSknS3Pfgb3UT15CbKv7mJCcXWu0iP7uGlTsWTTjvleiqQzJzypyP6Lmg6DV732gzHvVFPGIqcq2UGI7cp5e3dn_RDp_JVp-o1p6nAq9rfXFqDEou7Z-eZn82D6Q9qHtOKzq79TW97vmrTC16pOpLAY7WgfWIKjXU2stUoOXS0Xv_3OwAA___2YsKZ

# Regression test for using the Streamer API when we have a cast to an Oid type
# for which DistSQL is prohibited (#122274). (Note that, unlike above, we don't
# have 'lookup join (streamer)' here - that's the test.)
query T
EXPLAIN ANALYZE (DISTSQL) SELECT c.a::REGNAMESPACE FROM c JOIN d ON d.b = c.b
----
planning time: 10µs
execution time: 100µs
distribution: <hidden>
vectorized: <hidden>
plan type: custom
rows decoded from KV: 3 (24 B, 6 KVs, 3 gRPC calls)
maximum memory usage: <hidden>
network usage: <hidden>
regions: <hidden>
isolation level: serializable
priority: normal
quality of service: regular
·
• render
│
└── • lookup join
    │ sql nodes: <hidden>
    │ kv nodes: <hidden>
    │ regions: <hidden>
    │ actual row count: 2
    │ KV time: 0µs
    │ KV contention time: 0µs
    │ KV rows decoded: 1
    │ KV pairs read: 2
    │ KV bytes read: 8 B
    │ KV gRPC calls: 1
    │ estimated max memory allocated: 0 B
    │ table: d@d_pkey
    │ equality: (b) = (b)
    │
    └── • scan
          sql nodes: <hidden>
          kv nodes: <hidden>
          regions: <hidden>
          actual row count: 2
          KV time: 0µs
          KV contention time: 0µs
          KV rows decoded: 2
          KV pairs read: 4
          KV bytes read: 16 B
          KV gRPC calls: 2
          estimated max memory allocated: 0 B
          estimated row count: 1 (100% of the table; stats collected <hidden> ago)
          table: c@sec
          spans: FULL SCAN
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJy0lFFqGzEQht97imGeWlCSXVNKERScOm5xGtthHfJSTJCliaNaK20lLbEJPlYv0JOVXdkhjpPQQqMHw_zze2akb9g7DD8Ncpz0z_q9C5CHgvOi_3V0POxPzo97ffhSjIcg4XQ8GIGC8QjU4Qw-gTycIUPrFI1ESQH5d8xxyrDyTlIIzjfSXWsYqCXyjKG2VR0becpQOk_I7zDqaAg5XoiZoYKEIn-UIUNFUWjTlpXdQBIZ9pypSxs4CAZN70klmugAGX67hKZR4GDzFHqaa9ekI4WYpKhL4pD9_hVSLJ2NZKN2di_l3W0ARdIpUhw6SZytIgXwJBSH_AN8Tuq8OO-BFMaEe2MltN8a3yPD4WWvByFSBdLVNsJbWsYjbeM7Dll72WQgWjxnKMUSSiqdX4EwxkkRm7mydoaZiPKGArg6VnXk0Pjb-bdCB6drhinavH6IYk7I8we4BifIszX7e2KnTtsNsHwXmOqqq2pBK2R45tyiruCH0xac5dDtPATZUCzIKvIcuvnu3uGme8siPW2KQxTG7BF7Rfz5Pv6PT9HP9-l3_gt9WpKs9wd9paXoPFqK_F-WoqBQORtoZyGe65Q96nSQr6cMSc0pfTuCq72kc-9k603huC3UCopCTNk8BQO7TYXoSZT3O_2wUv5ipc5LlaYMr427vdIKOWabc_DEz_Zg8wcxD80TTW7cbVv2YlU1F7wWJhDDoVjQCUXypbY6RC2RR1_Tev3mTwAAAP__KdvF2w==

query T
EXPLAIN (OPT, VERBOSE) SELECT c.a FROM c INNER MERGE JOIN d ON c.a = d.b
----
project
 ├── columns: a:1
 ├── stats: [rows=10]
 ├── cost: 1129.989
 ├── distribution: test
 ├── prune: (1)
 └── inner-join (merge)
      ├── columns: c.a:1 d.b:10
      ├── flags: force merge join
      ├── left ordering: +1
      ├── right ordering: +10
      ├── stats: [rows=10, distinct(1)=1, null(1)=0, distinct(10)=1, null(10)=0]
      ├── cost: 1129.869
      ├── fd: (1)==(10), (10)==(1)
      ├── distribution: test
      ├── scan c
      │    ├── columns: c.a:1
      │    ├── stats: [rows=1, distinct(1)=1, null(1)=0]
      │    ├── cost: 30.12
      │    ├── ordering: +1
      │    ├── distribution: test
      │    ├── prune: (1)
      │    ├── interesting orderings: (+1)
      │    └── unfiltered-cols: (1-8)
      ├── scan d
      │    ├── columns: d.b:10
      │    ├── stats: [rows=1000, distinct(10)=100, null(10)=0]
      │    ├── cost: 1088.62
      │    ├── ordering: +10
      │    ├── distribution: test
      │    ├── prune: (10)
      │    ├── interesting orderings: (+10)
      │    └── unfiltered-cols: (9-14)
      └── filters (true)

query T
EXPLAIN ANALYZE (DISTSQL) SELECT c.a FROM c INNER MERGE JOIN d ON c.a = d.b
----
planning time: 10µs
execution time: 100µs
distribution: <hidden>
vectorized: <hidden>
plan type: custom
rows decoded from KV: 4 (32 B, 8 KVs, 4 gRPC calls)
maximum memory usage: <hidden>
network usage: <hidden>
regions: <hidden>
isolation level: serializable
priority: normal
quality of service: regular
·
• merge join
│ sql nodes: <hidden>
│ regions: <hidden>
│ actual row count: 2
│ estimated max memory allocated: 0 B
│ estimated max sql temp disk usage: 0 B
│ equality: (a) = (b)
│
├── • scan
│     sql nodes: <hidden>
│     kv nodes: <hidden>
│     regions: <hidden>
│     actual row count: 2
│     KV time: 0µs
│     KV contention time: 0µs
│     KV rows decoded: 2
│     KV pairs read: 4
│     KV bytes read: 16 B
│     KV gRPC calls: 2
│     estimated max memory allocated: 0 B
│     estimated row count: 1 (100% of the table; stats collected <hidden> ago)
│     table: c@c_pkey
│     spans: FULL SCAN
│
└── • scan
      sql nodes: <hidden>
      kv nodes: <hidden>
      regions: <hidden>
      actual row count: 2
      KV time: 0µs
      KV contention time: 0µs
      KV rows decoded: 2
      KV pairs read: 4
      KV bytes read: 16 B
      KV gRPC calls: 2
      estimated max memory allocated: 0 B
      missing stats
      table: d@d_pkey
      spans: FULL SCAN
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJzsk99O4zoQxu_PU4zmCnQMJOHoXFhCqk7pWZXdtqggblYVcu0hWHXsYDuiFepj7Qvsk62cAEvLnxUrLjcXVWbmy-fJ92vuMNwY5Hg2-DLon4PcF_D_dDICCcPxeDCF0WD6aQAnk-EYFEzGreAI1P4cGVqnaCwqCsi_Yo4zhrV3kkJwPrXuWsFQLZFnDLWtm5jaM4bSeUJ-h1FHQ8jxXMwNTUko8gcZMlQUhTatrezJy3pBK2TYd6apbOAgkOFZLdLtHjL8fAHpoMDB5l3pqdQujSOF2LWirohD9v1b6GrpbCQbtbPPRt7dBlAknSLFoeia81WkAJ6E4pD_C_913XJ62gcpjAmPwlpo_yD8BxmOLvp9CJFqkK6xEXZoGQ-0jbscsvZlOwHR4jVBJZZQUeX8CoQxToqY9sraHeYiymsK4JpYN5FD0rf7PzQKnK0ZdtV9-iGKkpDnT3ANj5Fna_Z7xPJNYqqnnhGb_yH2AcSKLWL5q8R-gmqs84o8qQ1Is_TkryQvYB-RL-nEaUv-oNjEbugq7vTyv3ePvC6vu1tkOEmv1EuUaUmyeY7vrazSLNwYiFTVoHRYQBNESR8Q5eFWlMV7_vxTCrWzgbYjffGkbOukvTwFS6qkDlRwjZd06p1stV05aY3ahqIQu2nRFUPbjvJ0gidRPX67T53ydzgVT53ybafiTafDDadsc6cZwyvjbi-1Qo7Z_bX3ws_DhekBUYYU9tm1u21tz1d1iupKmEAMR2JBxxTJV9rqELVEHn1D6_VfPwIAAP__d_EqNQ==

statement ok
RESET vectorize; RESET distsql

statement ok
SET tracing=off

# Making sure that colBatchScan operator can parallelize scans.
# This test is similar to that in testplannerlogic/select
statement ok
CREATE TABLE tpar (
    a INT PRIMARY KEY, item STRING, price FLOAT, FAMILY (a, item, price),
    UNIQUE INDEX item (item), UNIQUE INDEX p (price)
)

statement ok
ALTER TABLE tpar SPLIT AT VALUES(5)

# Run a select to prime the range cache to simplify the trace below.
statement ok
SELECT * FROM tpar

# Make sure that the scan actually gets parallelized.
statement ok
SET tracing = on; SELECT * FROM tpar WHERE a = 0 OR a = 10

statement ok
SET tracing = off

# The span "sending partial batch" means that the scan was parallelized.
#
# Most of the time we're seeing duplicate "querying next range" entries because
# we first use the range cache to try to partition the spans in order to have
# parallel TableReaders (we end up with a single partition though), and then we
# have a single TableReader performing the scan of two spans in parallel.
# However, occasionally the duplicate "querying next range at /Table/109/1/10/0"
# message is either dropped entirely or replaced with another
# "querying next range at /Table/109/1/0/0". It's not clear why that happens, so
# we deduplicate the messages to make the test non-flaky.
query T rowsort
SELECT DISTINCT message FROM [SHOW TRACE FOR SESSION]
WHERE message LIKE 'querying next range at %' OR
      message = '=== SPAN START: kv.DistSender: sending partial batch ==='
----
querying next range at /Table/109/1/0/0
=== SPAN START: kv.DistSender: sending partial batch ===
querying next range at /Table/109/1/10/0

# Used to be a regression test for #46123 (rowexec.TableReader not implementing
# execopnode.OpNode interface).
statement ok
CREATE TABLE t46123(c0 INT)

query T
EXPLAIN (VEC) SELECT stddev(0) FROM t46123 WHERE ('' COLLATE en)::BOOL
----
│
└ Node 1
  └ *colexec.orderedAggregator
    └ *colexecbase.constInt64Op
      └ *colexecbase.castDatumBoolOp
        └ *colexecbase.constDatumOp
          └ *colfetcher.ColBatchScan

# Regression test for #46122.
statement ok
CREATE TABLE t46122_0(c0 STRING); CREATE TABLE t46122_1(c0 STRING)

query T
EXPLAIN (VEC) SELECT t46122_0.c0 FROM t46122_0, t46122_1
----
│
└ Node 1
  └ *colexecjoin.crossJoiner
    ├ *colfetcher.ColBatchScan
    └ *colfetcher.ColBatchScan

statement ok
CREATE TABLE t46404_0(c0 INT); CREATE TABLE t46404_1(c0 INT)

query T
EXPLAIN (VEC) SELECT stddev((t46404_1.c0 > ANY (0, 0))::INT) FROM t46404_0, t46404_1 GROUP BY t46404_0.rowid
----
│
└ Node 1
  └ *colexec.hashAggregator
    └ *colexecbase.castBoolIntOp
      └ *colexecprojconst.defaultCmpRConstProjOp
        └ *colexecjoin.crossJoiner
          ├ *colfetcher.ColBatchScan
          └ *colfetcher.ColBatchScan

statement ok
CREATE TABLE xyz (
  x INT,
  y INT,
  z TEXT
)

# Check that we fallback gracefully to row-by-row engine on a join type with
# ON expression that we don't support.
query T
EXPLAIN (VEC) SELECT * FROM xyz AS t1 FULL OUTER JOIN xyz AS t2 ON t1.x = t2.x AND t1.x + t2.x = 0
----
│
└ Node 1
  └ *rowexec.hashJoiner
    ├ *colfetcher.ColBatchScan
    └ *colfetcher.ColBatchScan

# Verify that the vectorized engine is used (there is a mismatch between
# argument type width and the result).
query T
EXPLAIN (VEC) SELECT max(c) FROM a
----
│
└ Node 1
  └ *colexec.orderedAggregator
    └ *colfetcher.ColBatchScan

# Verify that binary operations on integers of any width return INT8.
statement ok
CREATE TABLE ints (_int2 INT2, _int4 INT4, _int8 INT8);
INSERT INTO ints VALUES (1, 1, 1), (2, 2, 2)

query T
EXPLAIN (VEC) SELECT _int2 * _int2 FROM ints WHERE _int4 + _int4 = _int8 + 2
----
│
└ Node 1
  └ *colexecproj.projMultInt16Int16Op
    └ *colexecsel.selEQInt64Int64Op
      └ *colexecprojconst.projPlusInt64Int64ConstOp
        └ *colexecproj.projPlusInt32Int32Op
          └ *colfetcher.ColBatchScan

# Check that joinReader core is wrapped into the plan when vectorize is set to
# `experimental_always` - that core is the only exception to disabling of
# wrapping.

query T
EXPLAIN (VEC) SELECT c.a FROM c JOIN d ON d.b = c.b
----
│
└ Node 1
  └ *rowexec.joinReader
    └ *colfetcher.ColBatchScan

statement ok
SET vectorize = experimental_always

statement ok
SELECT c.a FROM c JOIN d ON d.b = c.b

statement ok
RESET vectorize

statement ok
CREATE TABLE bytes_string(_group INT, _bytes BYTES, _string STRING)

query T
EXPLAIN (VEC) SELECT concat_agg(_bytes), concat_agg(_string) FROM bytes_string GROUP BY _group
----
│
└ Node 1
  └ *colexec.hashAggregator
    └ *colfetcher.ColBatchScan

query T
EXPLAIN (VEC) SELECT concat_agg(_bytes), concat_agg(_string) FROM bytes_string
----
│
└ Node 1
  └ *colexec.orderedAggregator
    └ *colfetcher.ColBatchScan

statement ok
CREATE TABLE t63792 (c INT);
INSERT INTO t63792 VALUES (NULL), (1), (2)

# Check that casts of constants are pre-evaluated (which allows us to use
# colexec.isNullProjOp instead of colexecproj.defaultCmpProjOp).
query T
EXPLAIN (VEC) SELECT c = c FROM t63792
----
│
└ Node 1
  └ *colexec.orProjOp
    ├ *colfetcher.ColBatchScan
    ├ *colexec.isNullProjOp
    └ *colexecbase.castOpNullAny
      └ *colexecbase.constNullOp

# Regression test that we can run EXPLAIN (VEC) on a mutation that utilizes the
# vectorized engine for some internal operations (#66568).
statement ok
CREATE TABLE t66568 (c INT PRIMARY KEY);

query T
EXPLAIN (VEC) INSERT INTO t66568 VALUES (1) ON CONFLICT DO NOTHING
----
│
└ Node 1
  └ *sql.planNodeToRowSource
    └ *colexecjoin.crossJoiner
      ├ *sql.planNodeToRowSource
      └ *colfetcher.ColBatchScan

statement ok
CREATE TABLE t_string (a STRING);
INSERT INTO t_string VALUES (NULL)

# Check that IN expression with non-constant right-hand side is handled via the
# default comparison operator.
query T
EXPLAIN (VEC) SELECT 'b' IN ('b', a, 'a') FROM t_string
----
│
└ Node 1
  └ *colexecproj.defaultCmpProjOp
    └ *colexec.tupleProjOp
      └ *colexecbase.constBytesOp
        └ *colexecbase.constBytesOp
          └ *colexecbase.constBytesOp
            └ *colfetcher.ColBatchScan

# Regression test for calling Release() before Close() on a vectorized index
# joiner (#70000).
statement ok
CREATE TABLE table70000_1 (i INT PRIMARY KEY);
CREATE TABLE table70000_2 (f FLOAT, b BOOL, INDEX f_idx(f));

query T
EXPLAIN (VEC)
  SELECT
    CASE WHEN b THEN (SELECT f FROM table70000_1 LIMIT 1) ELSE f END
  FROM
    table70000_2@f_idx;
----
│
└ Node 1
  └ *colexec.caseOp
    ├ *colexec.bufferOp
    │ └ *sql.planNodeToRowSource
    │   └ *colfetcher.ColIndexJoin
    │     └ *colfetcher.ColBatchScan
    ├ *colexec.bufferOp
    └ *colexec.bufferOp

# Regression test for releasing operators before closing them with EXPLAIN (VEC)
# (#70438).
statement ok
CREATE TABLE t70438 (k INT PRIMARY KEY, v INT, UNIQUE INDEX foo (v));
INSERT INTO t70438 VALUES (1, 2), (3, 4), (5, 6), (7, 8);

query T
EXPLAIN (VEC) DELETE FROM t70438 WHERE k=3 OR v=6
----
│
└ Node 1
  └ *sql.planNodeToRowSource
    └ *colexec.UnorderedDistinct
      └ *colexec.SerialUnorderedSynchronizer
        ├ *colfetcher.ColBatchScan
        └ *colfetcher.ColBatchScan

# Some tests for set-op cross joins.
statement ok
CREATE TABLE t ();
CREATE TABLE u ();
INSERT INTO t (rowid) VALUES (1), (2);
INSERT INTO u (rowid) VALUES (1);

query T
EXPLAIN (VEC) SELECT * FROM t INTERSECT ALL SELECT * FROM u
----
│
└ Node 1
  └ *colexecjoin.crossJoiner
    ├ *colfetcher.ColBatchScan
    └ *colfetcher.ColBatchScan

query T
EXPLAIN (VEC) SELECT * FROM t EXCEPT ALL SELECT * FROM u
----
│
└ Node 1
  └ *colexecjoin.crossJoiner
    ├ *colfetcher.ColBatchScan
    └ *colfetcher.ColBatchScan
