# LogicTest: local

# EXPLAIN test cases for using invertedJoiner on an inverted geospatial index.

statement ok
CREATE TABLE ltable(
  lk int primary key,
  geom1 geometry,
  geom2 geometry
)

statement ok
CREATE TABLE rtable(
  rk1 int,
  geom geometry,
  rk2 string,
  PRIMARY KEY (rk1, rk2),
  INVERTED INDEX geom_index(geom)
)

query T
EXPLAIN (DISTSQL)
SELECT lk, rk1 FROM ltable JOIN rtable@geom_index ON ST_Intersects(ltable.geom1, rtable.geom)
----
distribution: local
vectorized: true
·
• lookup join
│ table: rtable@rtable_pkey
│ equality: (rk1, rk2) = (rk1, rk2)
│ equality cols are key
│ pred: st_intersects(geom1, geom)
│
└── • inverted join
    │ table: rtable@geom_index
    │
    └── • scan
          missing stats
          table: ltable@ltable_pkey
          spans: FULL SCAN
·
Diagram: https://cockroachdb.github.io/distsqlplan/decode.html#eJyUklFv2j4Uxd__n8K6T0W6_IvDugc_ZSpMSkWhAx4mTQhlyS3zEuzMdlYqxHef7NAOQkGrHyC-vj4-v6O7BfurBAHDrw-jT8mYXQ2S2Xz2ZdRhs-FoeDtnZYHMFJx9nk7uWenS7yWxu0kyZiZ8xyvS66VUOW3YZMysW0rlyFjKnL1q2v_3LRz3F8KuAwhK5zRO12RBfAMOC4TK6Iys1caXtqEhyTcgeghSVbXz5QVCpg2B2IKTriQQMPeyU0pzMtc9QMjJpbIMso2BuPlbVgU9A8KtLuu1siKgBW-AMKtSX-rCYoega_f3OevSFYHgB_6SAYjeDv_dYqJ-k3GU32mpyFzzY5cnSQK-3hhuKtNKNY6QxTedQxJT-HyLqOEBhEntBIs5xhHGfYw_nMWKWlj8PVgeZx989CaSOQp-pHVRV-ynloppJVjjDOFkbgLhx1PCNtzNWax-Cyt6D9aUbKWVpSOkcy_1Wi91-W6BQPmKmim2ujYZPRidhd5mOwlCoZCTdc0pbzaJejmyzlC6fh22QyV-USk6r8TbStFFpf4lTwuEx1I_LWUOAnr71X3j52WBv5CurA979kM_Bdn5c-WjekxLSwj3aUEDcmTWUknrZAbCmZp2u__-BAAA___c-4Ne

query T
EXPLAIN SELECT lk, rk1, rk2, rtable.geom
FROM ltable JOIN rtable@geom_index ON ST_Intersects(ltable.geom1, rtable.geom)
----
distribution: local
vectorized: true
·
• lookup join
│ table: rtable@rtable_pkey
│ equality: (rk1, rk2) = (rk1, rk2)
│ equality cols are key
│ pred: st_intersects(geom1, geom)
│
└── • inverted join
    │ table: rtable@geom_index
    │
    └── • scan
          missing stats
          table: ltable@ltable_pkey
          spans: FULL SCAN

query T
EXPLAIN SELECT lk, rk1, rk2, rtable.geom
FROM ltable JOIN rtable@geom_index ON ST_DWithin(ltable.geom1, rtable.geom, 5)
----
distribution: local
vectorized: true
·
• lookup join
│ table: rtable@rtable_pkey
│ equality: (rk1, rk2) = (rk1, rk2)
│ equality cols are key
│ pred: st_dwithin(geom1, geom, 5.0)
│
└── • inverted join
    │ table: rtable@geom_index
    │
    └── • scan
          missing stats
          table: ltable@ltable_pkey
          spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable JOIN rtable@geom_index
ON ST_Intersects(rtable.geom, ltable.geom1) OR ST_DWithin(ltable.geom1, rtable.geom, 2) ORDER BY (lk, rk1)
----
distribution: local
vectorized: true
·
• sort
│ columns: (lk, rk1)
│ estimated row count: 326,700 (missing stats)
│ order: +lk,+rk1
│ already ordered: +lk
│
└── • project
    │ columns: (lk, rk1)
    │ ordering: +lk
    │
    └── • project
        │ columns: (lk, geom1, rk1, geom)
        │ ordering: +lk
        │
        └── • lookup join (inner)
            │ columns: (lk, geom1, rk1, rk2, rk1, geom)
            │ ordering: +lk
            │ estimated row count: 326,700 (missing stats)
            │ table: rtable@rtable_pkey
            │ equality: (rk1, rk2) = (rk1, rk2)
            │ equality cols are key
            │ pred: st_intersects(geom, geom1) OR st_dwithin(geom1, geom, 2.0)
            │
            └── • project
                │ columns: (lk, geom1, rk1, rk2)
                │ ordering: +lk
                │
                └── • inverted join (inner)
                    │ columns: (lk, geom1, rk1, rk2, geom_inverted_key)
                    │ ordering: +lk
                    │ estimated row count: 10,000 (missing stats)
                    │ table: rtable@geom_index
                    │ inverted expr: st_intersects(geom1, geom_inverted_key) OR st_dwithin(geom1, geom_inverted_key, 2.0)
                    │
                    └── • scan
                          columns: (lk, geom1)
                          ordering: +lk
                          estimated row count: 1,000 (missing stats)
                          table: ltable@ltable_pkey
                          spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable JOIN rtable@geom_index
ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_DWithin(rtable.geom, ltable.geom1, 2) ORDER BY (lk, rk1)
----
distribution: local
vectorized: true
·
• sort
│ columns: (lk, rk1)
│ estimated row count: 9,801 (missing stats)
│ order: +lk,+rk1
│ already ordered: +lk
│
└── • project
    │ columns: (lk, rk1)
    │ ordering: +lk
    │
    └── • project
        │ columns: (lk, geom1, rk1, geom)
        │ ordering: +lk
        │
        └── • lookup join (inner)
            │ columns: (lk, geom1, rk1, rk2, rk1, geom)
            │ ordering: +lk
            │ estimated row count: 9,801 (missing stats)
            │ table: rtable@rtable_pkey
            │ equality: (rk1, rk2) = (rk1, rk2)
            │ equality cols are key
            │ pred: st_intersects(geom1, geom) AND st_dwithin(geom, geom1, 2.0)
            │
            └── • project
                │ columns: (lk, geom1, rk1, rk2)
                │ ordering: +lk
                │
                └── • inverted join (inner)
                    │ columns: (lk, geom1, rk1, rk2, geom_inverted_key)
                    │ ordering: +lk
                    │ estimated row count: 10,000 (missing stats)
                    │ table: rtable@geom_index
                    │ inverted expr: st_intersects(geom1, geom_inverted_key) AND st_dwithin(geom1, geom_inverted_key, 2.0)
                    │
                    └── • scan
                          columns: (lk, geom1)
                          ordering: +lk
                          estimated row count: 1,000 (missing stats)
                          table: ltable@ltable_pkey
                          spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable JOIN rtable@geom_index
ON ST_Intersects(ltable.geom1, rtable.geom) AND ST_Covers(ltable.geom2, rtable.geom)
AND (ST_DFullyWithin(rtable.geom, ltable.geom1, 100) OR ST_Intersects('POINT(1.0 1.0)', rtable.geom))
----
distribution: local
vectorized: true
·
• project
│ columns: (lk, rk1)
│
└── • project
    │ columns: (lk, geom1, geom2, rk1, geom)
    │
    └── • lookup join (inner)
        │ columns: (lk, geom1, geom2, rk1, rk2, rk1, geom)
        │ estimated row count: 3,267 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: (st_intersects(geom1, geom) AND st_covers(geom2, geom)) AND (st_dfullywithin(geom, geom1, 100.0) OR st_intersects('0101000000000000000000F03F000000000000F03F', geom))
        │
        └── • project
            │ columns: (lk, geom1, geom2, rk1, rk2)
            │
            └── • inverted join (inner)
                │ columns: (lk, geom1, geom2, rk1, rk2, geom_inverted_key)
                │ estimated row count: 10,000 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: (st_intersects(geom1, geom_inverted_key) AND st_covers(geom2, geom_inverted_key)) AND (st_dfullywithin(geom1, geom_inverted_key, 100.0) OR st_intersects('0101000000000000000000F03F000000000000F03F', geom_inverted_key))
                │
                └── • scan
                      columns: (lk, geom1, geom2)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

# This query performs a semi-join, which is converted to paired joins by the
# optimizer.
query T
EXPLAIN (VERBOSE)
SELECT lk FROM ltable WHERE EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
----
distribution: local
vectorized: true
·
• project
│ columns: (lk)
│
└── • project
    │ columns: (lk, geom2)
    │
    └── • lookup join (semi)
        │ columns: (lk, geom2, rk1, rk2, cont)
        │ estimated row count: 10 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: st_intersects(geom2, geom)
        │
        └── • project
            │ columns: (lk, geom2, rk1, rk2, cont)
            │
            └── • inverted join (inner)
                │ columns: (lk, geom2, rk1, rk2, geom_inverted_key, cont)
                │ estimated row count: 10,000 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: st_intersects(geom2, geom_inverted_key)
                │
                └── • scan
                      columns: (lk, geom2)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

# Left outer joins are also converted to paired joins by the optimizer.
query T
EXPLAIN (VERBOSE)
SELECT lk, rk1 FROM ltable LEFT JOIN rtable ON ST_Intersects(ltable.geom1, rtable.geom)
----
distribution: local
vectorized: true
·
• project
│ columns: (lk, rk1)
│
└── • project
    │ columns: (lk, geom1, rk1, geom)
    │
    └── • lookup join (left outer)
        │ columns: (lk, geom1, rk1, rk2, cont, rk1, geom)
        │ estimated row count: 10,000 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: st_intersects(geom1, geom)
        │
        └── • project
            │ columns: (lk, geom1, rk1, rk2, cont)
            │
            └── • inverted join (left outer)
                │ columns: (lk, geom1, rk1, rk2, geom_inverted_key, cont)
                │ estimated row count: 10,000 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: st_intersects(geom1, geom_inverted_key)
                │
                └── • scan
                      columns: (lk, geom1)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
WITH q AS (
  SELECT * FROM ltable WHERE lk > 2
)
SELECT count(*), (SELECT count(*) FROM q) FROM (
  SELECT lk, rk1
  FROM q
  LEFT JOIN rtable ON ST_Intersects(q.geom1, rtable.geom)
) GROUP BY lk
----
distribution: local
vectorized: true
·
• root
│ columns: (count, count)
│
├── • render
│   │ columns: (count, count)
│   │ render count: @S2
│   │ render count_rows: count_rows
│   │
│   └── • group (hash)
│       │ columns: (lk, count_rows)
│       │ estimated row count: 333 (missing stats)
│       │ aggregate 0: count_rows()
│       │ group by: lk
│       │
│       └── • project
│           │ columns: (lk)
│           │
│           └── • project
│               │ columns: (lk, geom1, geom)
│               │
│               └── • lookup join (left outer)
│                   │ columns: (lk, geom1, rk1, rk2, cont, geom)
│                   │ estimated row count: 3,333 (missing stats)
│                   │ table: rtable@rtable_pkey
│                   │ equality: (rk1, rk2) = (rk1, rk2)
│                   │ equality cols are key
│                   │ pred: st_intersects(geom1, geom)
│                   │
│                   └── • project
│                       │ columns: (lk, geom1, rk1, rk2, cont)
│                       │
│                       └── • inverted join (left outer)
│                           │ columns: (lk, geom1, rk1, rk2, geom_inverted_key, cont)
│                           │ estimated row count: 3,333 (missing stats)
│                           │ table: rtable@geom_index
│                           │ inverted expr: st_intersects(geom1, geom_inverted_key)
│                           │
│                           └── • project
│                               │ columns: (lk, geom1)
│                               │
│                               └── • scan buffer
│                                     columns: (lk, geom1, geom2)
│                                     estimated row count: 333 (missing stats)
│                                     label: buffer 1 (q)
│
├── • subquery
│   │ id: @S1
│   │ original sql: SELECT * FROM ltable WHERE lk > 2
│   │ exec mode: discard all rows
│   │
│   └── • buffer
│       │ columns: (lk, geom1, geom2)
│       │ label: buffer 1 (q)
│       │
│       └── • scan
│             columns: (lk, geom1, geom2)
│             estimated row count: 333 (missing stats)
│             table: ltable@ltable_pkey
│             spans: /3-
│
└── • subquery
    │ id: @S2
    │ original sql: (SELECT count(*) FROM q)
    │ exec mode: one row
    │
    └── • group (scalar)
        │ columns: (count_rows)
        │ estimated row count: 1 (missing stats)
        │ aggregate 0: count_rows()
        │
        └── • project
            │ columns: ()
            │
            └── • scan buffer
                  columns: (lk, geom1, geom2)
                  estimated row count: 333 (missing stats)
                  label: buffer 1 (q)

# Anti joins are also converted to paired joins by the optimizer.
query T
EXPLAIN (VERBOSE)
SELECT lk FROM ltable WHERE NOT EXISTS (SELECT * FROM rtable WHERE ST_Intersects(ltable.geom2, rtable.geom))
----
distribution: local
vectorized: true
·
• project
│ columns: (lk)
│
└── • project
    │ columns: (lk, geom2)
    │
    └── • lookup join (anti)
        │ columns: (lk, geom2, rk1, rk2, cont)
        │ estimated row count: 990 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: st_intersects(geom2, geom)
        │
        └── • project
            │ columns: (lk, geom2, rk1, rk2, cont)
            │
            └── • inverted join (left outer)
                │ columns: (lk, geom2, rk1, rk2, geom_inverted_key, cont)
                │ estimated row count: 10,000 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: st_intersects(geom2, geom_inverted_key)
                │
                └── • scan
                      columns: (lk, geom2)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
SELECT lk FROM ltable
WHERE NOT EXISTS (
  SELECT * FROM rtable WHERE ST_Covers(ltable.geom1, rtable.geom) AND lk > 5 AND rk1 > 12
)
----
distribution: local
vectorized: true
·
• project
│ columns: (lk)
│
└── • project
    │ columns: (lk, geom1)
    │
    └── • lookup join (anti)
        │ columns: (lk, geom1, rk1, rk2, cont)
        │ estimated row count: 997 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: st_covers(geom1, geom)
        │
        └── • project
            │ columns: (lk, geom1, rk1, rk2, cont)
            │
            └── • inverted join (left outer)
                │ columns: (lk, geom1, rk1, rk2, geom_inverted_key, cont)
                │ estimated row count: 1,111 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: st_covers(geom1, geom_inverted_key)
                │ on: (lk > 5) AND (rk1 > 12)
                │
                └── • scan
                      columns: (lk, geom1)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

# Bounding box operations.
statement ok
SET CLUSTER SETTING sql.spatial.experimental_box2d_comparison_operators.enabled = on

query T
EXPLAIN (VERBOSE)
SELECT lk, rk1, rk2 FROM ltable JOIN rtable@geom_index ON ltable.geom1 ~ rtable.geom
----
distribution: local
vectorized: true
·
• project
│ columns: (lk, rk1, rk2)
│
└── • project
    │ columns: (lk, geom1, rk1, geom, rk2)
    │
    └── • lookup join (inner)
        │ columns: (lk, geom1, rk1, rk2, rk1, geom, rk2)
        │ estimated row count: 9,801 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: geom1 ~ geom
        │
        └── • project
            │ columns: (lk, geom1, rk1, rk2)
            │
            └── • inverted join (inner)
                │ columns: (lk, geom1, rk1, rk2, geom_inverted_key)
                │ estimated row count: 10,000 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: st_covers(geom1, geom_inverted_key)
                │
                └── • scan
                      columns: (lk, geom1)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
SELECT lk, rk1, rk2 FROM ltable JOIN rtable@geom_index ON rtable.geom ~ ltable.geom1
----
distribution: local
vectorized: true
·
• project
│ columns: (lk, rk1, rk2)
│
└── • project
    │ columns: (lk, geom1, rk1, geom, rk2)
    │
    └── • lookup join (inner)
        │ columns: (lk, geom1, rk1, rk2, rk1, geom, rk2)
        │ estimated row count: 9,801 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: geom ~ geom1
        │
        └── • project
            │ columns: (lk, geom1, rk1, rk2)
            │
            └── • inverted join (inner)
                │ columns: (lk, geom1, rk1, rk2, geom_inverted_key)
                │ estimated row count: 10,000 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: st_coveredby(geom1, geom_inverted_key)
                │
                └── • scan
                      columns: (lk, geom1)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

query T
EXPLAIN (VERBOSE)
SELECT lk, rk1, rk2 FROM ltable JOIN rtable@geom_index ON rtable.geom && ltable.geom1
----
distribution: local
vectorized: true
·
• project
│ columns: (lk, rk1, rk2)
│
└── • project
    │ columns: (lk, geom1, rk1, geom, rk2)
    │
    └── • lookup join (inner)
        │ columns: (lk, geom1, rk1, rk2, rk1, geom, rk2)
        │ estimated row count: 9,801 (missing stats)
        │ table: rtable@rtable_pkey
        │ equality: (rk1, rk2) = (rk1, rk2)
        │ equality cols are key
        │ pred: geom && geom1
        │
        └── • project
            │ columns: (lk, geom1, rk1, rk2)
            │
            └── • inverted join (inner)
                │ columns: (lk, geom1, rk1, rk2, geom_inverted_key)
                │ estimated row count: 10,000 (missing stats)
                │ table: rtable@geom_index
                │ inverted expr: st_intersects(geom1, geom_inverted_key)
                │
                └── • scan
                      columns: (lk, geom1)
                      estimated row count: 1,000 (missing stats)
                      table: ltable@ltable_pkey
                      spans: FULL SCAN

statement ok
CREATE TABLE g (
  k INT PRIMARY KEY,
  geom GEOMETRY
)

statement ok
CREATE INVERTED INDEX foo_inv ON g(geom)

# This query performs an inverted join.
query T
EXPLAIN SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@g_pkey AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k
----
distribution: local
vectorized: true
·
• sort
│ order: +k,+k
│
└── • lookup join
    │ table: g@g_pkey
    │ equality: (k) = (k)
    │ equality cols are key
    │ pred: st_contains(geom, geom)
    │
    └── • inverted join
        │ table: g@foo_inv
        │
        └── • scan
              missing stats
              table: g@g_pkey
              spans: FULL SCAN

# This query performs a cross join followed by a filter.
query T
EXPLAIN SELECT g1.k, g2.k FROM g@g_pkey AS g1, g@g_pkey AS g2 WHERE ST_Contains(g1.geom, g2.geom) ORDER BY g1.k, g2.k
----
distribution: local
vectorized: true
·
• sort
│ order: +k,+k
│
└── • cross join
    │ pred: st_contains(geom, geom)
    │
    ├── • scan
    │     missing stats
    │     table: g@g_pkey
    │     spans: FULL SCAN
    │
    └── • scan
          missing stats
          table: g@g_pkey
          spans: FULL SCAN

# This query performs an inverted join with an additional filter.
query T
EXPLAIN SELECT g1.k, g2.k FROM g@foo_inv AS g1, g@g_pkey AS g2
WHERE ST_Contains(g1.geom, g2.geom)
  AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
  AND g2.k < 20
ORDER BY g1.k, g2.k
----
distribution: local
vectorized: true
·
• sort
│ order: +k,+k
│
└── • lookup join
    │ table: g@g_pkey
    │ equality: (k) = (k)
    │ equality cols are key
    │ pred: st_contains(geom, geom) AND st_contains(geom, '010300000001000000050000000000000000000000000000000000000000000000000000000000000000001440000000000000144000000000000014400000000000001440000000000000000000000000000000000000000000000000')
    │
    └── • inverted join
        │ table: g@foo_inv
        │
        └── • scan
              missing stats
              table: g@g_pkey
              spans: [ - /19]

# This query performs a cross join followed by a filter.
query T
EXPLAIN SELECT g1.k, g2.k FROM g@g_pkey AS g1, g@g_pkey AS g2
WHERE ST_Contains(g1.geom, g2.geom)
  AND ST_Contains(g1.geom, ST_MakePolygon('LINESTRING(0 0, 0 5, 5 5, 5 0, 0 0)'::geometry))
  AND g2.k < 20
ORDER BY g1.k, g2.k
----
distribution: local
vectorized: true
·
• sort
│ order: +k,+k
│
└── • cross join
    │ pred: st_contains(geom, geom)
    │
    ├── • scan
    │     missing stats
    │     table: g@g_pkey
    │     spans: [ - /19]
    │
    └── • filter
        │ filter: st_contains(geom, '010300000001000000050000000000000000000000000000000000000000000000000000000000000000001440000000000000144000000000000014400000000000001440000000000000000000000000000000000000000000000000')
        │
        └── • scan
              missing stats
              table: g@g_pkey
              spans: FULL SCAN
