exec-ddl
CREATE TABLE abcd (a INT, b INT, c INT, INDEX (a,b))
----

exec-ddl
CREATE TABLE small (m INT, n INT)
----

exec-ddl
ALTER TABLE small INJECT STATISTICS '[
  {
    "columns": ["m"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10,
    "distinct_count": 10,
    "avg_size": 2
  }
]'
----

# We can only test lookup stat generation when using non-covering indexes
# (that's when we create a group with LookupJoin). We can compare the
# statistics with the top-level join, they should be in the same ballpark.

opt
SELECT * FROM small JOIN abcd ON a=m
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int) c:8(int)
 ├── key columns: [9] = [9]
 ├── lookup columns are key
 ├── stats: [rows=99, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0]
 ├── fd: (1)==(6), (6)==(1)
 ├── inner-join (lookup abcd@abcd_a_b_idx)
 │    ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int) abcd.rowid:9(int!null)
 │    ├── key columns: [1] = [6]
 │    ├── stats: [rows=99, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0]
 │    ├── fd: (9)-->(6,7), (1)==(6), (6)==(1)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0]
 │    └── filters (true)
 └── filters (true)

# Filter that gets pushed down on both sides, but comes back into the ON
# condition for the lookup side.
opt
SELECT * FROM small JOIN abcd ON a=m WHERE n > 2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int!null) a:6(int!null) b:7(int) c:8(int)
 ├── key columns: [9] = [9]
 ├── lookup columns are key
 ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(6)=9.9, null(6)=0]
 ├── fd: (1)==(6), (6)==(1)
 ├── inner-join (lookup abcd@abcd_a_b_idx)
 │    ├── columns: m:1(int!null) n:2(int!null) a:6(int!null) b:7(int) abcd.rowid:9(int!null)
 │    ├── key columns: [1] = [6]
 │    ├── stats: [rows=98.01, distinct(1)=9.9, null(1)=0, distinct(6)=9.9, null(6)=0]
 │    ├── fd: (9)-->(6,7), (1)==(6), (6)==(1)
 │    ├── select
 │    │    ├── columns: m:1(int) n:2(int!null)
 │    │    ├── stats: [rows=9.9, distinct(1)=9.9, null(1)=0, distinct(2)=1, null(2)=0]
 │    │    ├── scan small
 │    │    │    ├── columns: m:1(int) n:2(int)
 │    │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(2)=1, null(2)=0.1]
 │    │    └── filters
 │    │         └── n:2 > 2 [type=bool, outer=(2), constraints=(/2: [/3 - ]; tight)]
 │    └── filters (true)
 └── filters (true)

# Filter that applies to the right side and gets pulled back into the ON
# condition.
opt
SELECT * FROM small JOIN abcd ON a=m WHERE b > 2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int!null) c:8(int)
 ├── key columns: [9] = [9]
 ├── lookup columns are key
 ├── stats: [rows=33.58237, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0]
 ├── fd: (1)==(6), (6)==(1)
 ├── inner-join (lookup abcd@abcd_a_b_idx)
 │    ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int!null) abcd.rowid:9(int!null)
 │    ├── lookup expression
 │    │    └── filters
 │    │         ├── b:7 > 2 [type=bool, outer=(7), constraints=(/7: [/3 - ]; tight)]
 │    │         └── m:1 = a:6 [type=bool, outer=(1,6), constraints=(/1: (/NULL - ]; /6: (/NULL - ]), fd=(1)==(6), (6)==(1)]
 │    ├── stats: [rows=33, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0, distinct(7)=33, null(7)=0]
 │    ├── fd: (9)-->(6,7), (1)==(6), (6)==(1)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0]
 │    └── filters (true)
 └── filters (true)

# Filter that can only be applied after the primary index join.
opt
SELECT * FROM small JOIN abcd ON a=m WHERE c>2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int) c:8(int!null)
 ├── key columns: [9] = [9]
 ├── lookup columns are key
 ├── stats: [rows=33.58237, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0]
 ├── fd: (1)==(6), (6)==(1)
 ├── inner-join (lookup abcd@abcd_a_b_idx)
 │    ├── columns: m:1(int!null) n:2(int) a:6(int!null) b:7(int) abcd.rowid:9(int!null)
 │    ├── key columns: [1] = [6]
 │    ├── stats: [rows=99, distinct(1)=10, null(1)=0, distinct(6)=10, null(6)=0]
 │    ├── fd: (9)-->(6,7), (1)==(6), (6)==(1)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0]
 │    └── filters (true)
 └── filters
      └── c:8 > 2 [type=bool, outer=(8), constraints=(/8: [/3 - ]; tight)]

# Multiple equalities.
opt
SELECT * FROM small JOIN abcd ON a=m AND b=n WHERE c>2
----
inner-join (lookup abcd)
 ├── columns: m:1(int!null) n:2(int!null) a:6(int!null) b:7(int!null) c:8(int!null)
 ├── key columns: [9] = [9]
 ├── lookup columns are key
 ├── stats: [rows=0.3349493, distinct(1)=0.334949, null(1)=0, distinct(2)=1e-10, null(2)=0, distinct(6)=0.334949, null(6)=0, distinct(7)=1e-10, null(7)=0]
 ├── fd: (1)==(6), (6)==(1), (2)==(7), (7)==(2)
 ├── inner-join (lookup abcd@abcd_a_b_idx)
 │    ├── columns: m:1(int!null) n:2(int!null) a:6(int!null) b:7(int!null) abcd.rowid:9(int!null)
 │    ├── key columns: [1 2] = [6 7]
 │    ├── stats: [rows=0.970299, distinct(1)=0.970299, null(1)=0, distinct(2)=1e-10, null(2)=0, distinct(6)=0.970299, null(6)=0, distinct(7)=1e-10, null(7)=0]
 │    ├── fd: (9)-->(6,7), (1)==(6), (6)==(1), (2)==(7), (7)==(2)
 │    ├── scan small
 │    │    ├── columns: m:1(int) n:2(int)
 │    │    └── stats: [rows=10, distinct(1)=10, null(1)=0, distinct(2)=1, null(2)=0.1]
 │    └── filters (true)
 └── filters
      └── c:8 > 2 [type=bool, outer=(8), constraints=(/8: [/3 - ]; tight)]

exec-ddl
CREATE TABLE abc (a INT, b INT, c INT, PRIMARY KEY (a, c))
----

exec-ddl
CREATE TABLE def (d INT, e INT, f INT, g FLOAT, PRIMARY KEY (f, e), INDEX e_idx (e) STORING (d), INDEX d_idx (d))
----

# Set up the statistics as if the first table is much smaller than the second.
exec-ddl
ALTER TABLE abc INJECT STATISTICS '[
  {
    "columns": ["a"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 100,
    "distinct_count": 100,
    "avg_size": 2
  }
]'
----

exec-ddl
ALTER TABLE def INJECT STATISTICS '[
  {
    "columns": ["d"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 1000,
    "avg_size": 5
  },
  {
    "columns": ["e"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 100,
    "avg_size": 6
  },
  {
    "columns": ["f"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10000,
    "distinct_count": 10000,
    "avg_size": 7
  }
]'
----

# The filter a=f is selective, so we expect a lookup join.
opt
SELECT a, b, c, d, e, f FROM abc JOIN def ON a = f
----
inner-join (lookup def)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:6(int) e:7(int!null) f:8(int!null)
 ├── key columns: [1] = [8]
 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(8)=100, null(8)=0]
 ├── key: (3,7,8)
 ├── fd: (1,3)-->(2), (7,8)-->(6), (1)==(8), (8)==(1)
 ├── scan abc
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    ├── stats: [rows=100, distinct(1)=100, null(1)=0]
 │    ├── key: (1,3)
 │    └── fd: (1,3)-->(2)
 └── filters (true)

# The filter a=e is not very selective, so we do not expect a lookup join.
opt
SELECT a, b, c, d, e, f FROM abc JOIN def ON a = e
----
inner-join (merge)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:6(int) e:7(int!null) f:8(int!null)
 ├── left ordering: +7
 ├── right ordering: +1
 ├── stats: [rows=10000, distinct(1)=100, null(1)=0, distinct(7)=100, null(7)=0]
 ├── key: (3,7,8)
 ├── fd: (1,3)-->(2), (7,8)-->(6), (1)==(7), (7)==(1)
 ├── scan def@e_idx
 │    ├── columns: d:6(int) e:7(int!null) f:8(int!null)
 │    ├── stats: [rows=10000, distinct(7)=100, null(7)=0]
 │    ├── key: (7,8)
 │    ├── fd: (7,8)-->(6)
 │    └── ordering: +7
 ├── scan abc
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    ├── stats: [rows=100, distinct(1)=100, null(1)=0]
 │    ├── key: (1,3)
 │    ├── fd: (1,3)-->(2)
 │    └── ordering: +1
 └── filters (true)

# Check column statistics for lookup join.
opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=7 colstat=8 colstat=(2,7,8)
SELECT a, b, c, d, e, f FROM abc JOIN DEF ON a = f
----
inner-join (lookup def)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:6(int) e:7(int!null) f:8(int!null)
 ├── key columns: [1] = [8]
 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=9.99955, null(2)=1, distinct(3)=9.99955, null(3)=0, distinct(4)=1, null(4)=0, distinct(7)=63.2139, null(7)=0, distinct(8)=100, null(8)=0, distinct(2,7,8)=100, null(2,7,8)=0]
 ├── key: (3,7,8)
 ├── fd: (1,3)-->(2), (7,8)-->(6), (1)==(8), (8)==(1)
 ├── scan abc
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1, distinct(3)=10, null(3)=0]
 │    ├── key: (1,3)
 │    └── fd: (1,3)-->(2)
 └── filters (true)

# Check column statistics for double lookup join.
opt colstat=9
SELECT * FROM abc LEFT JOIN DEF ON a = d AND b = 3
----
left-join (lookup def)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:6(int) e:7(int) f:8(int) g:9(float)
 ├── key columns: [8 7] = [8 7]
 ├── lookup columns are key
 ├── stats: [rows=100, distinct(6)=10, null(6)=90, distinct(9)=9.95022, null(9)=91]
 ├── key: (1,3,7,8)
 ├── fd: (1,3)-->(2), (7,8)-->(6,9)
 ├── left-join (lookup def@d_idx)
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:6(int) e:7(int) f:8(int)
 │    ├── key columns: [1] = [6]
 │    ├── stats: [rows=100, distinct(6)=10, null(6)=90]
 │    ├── key: (1,3,7,8)
 │    ├── fd: (1,3)-->(2), (7,8)-->(6)
 │    ├── scan abc
 │    │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    │    ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1]
 │    │    ├── key: (1,3)
 │    │    └── fd: (1,3)-->(2)
 │    └── filters
 │         └── b:2 = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)]
 └── filters (true)

# The filter a=e is not very selective, so we do not expect a lookup join, even
# though there is an additional filter.
opt colstat=9
SELECT * FROM abc LEFT JOIN DEF ON a = e AND b = 3
----
right-join (hash)
 ├── columns: a:1(int!null) b:2(int) c:3(int!null) d:6(int) e:7(int) f:8(int) g:9(float)
 ├── stats: [rows=100, distinct(7)=100, null(7)=0, distinct(9)=95.1671, null(9)=1]
 ├── key: (1,3,7,8)
 ├── fd: (1,3)-->(2), (7,8)-->(6,9)
 ├── scan def
 │    ├── columns: d:6(int) e:7(int!null) f:8(int!null) g:9(float)
 │    ├── stats: [rows=10000, distinct(7)=100, null(7)=0, distinct(9)=1000, null(9)=100]
 │    ├── key: (7,8)
 │    └── fd: (7,8)-->(6,9)
 ├── scan abc
 │    ├── columns: a:1(int!null) b:2(int) c:3(int!null)
 │    ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=10, null(2)=1]
 │    ├── key: (1,3)
 │    └── fd: (1,3)-->(2)
 └── filters
      ├── a:1 = e:7 [type=bool, outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)]
      └── b:2 = 3 [type=bool, outer=(2), constraints=(/2: [/3 - /3]; tight), fd=()-->(2)]

exec-ddl
CREATE TABLE t (x INT, y INT, INDEX x_idx (x) STORING (y), INDEX y_idx (y) STORING (x), INDEX xy_idx (x, y))
----

exec-ddl
CREATE TABLE u (x INT, y INT, INDEX x_idx (x) STORING (y), INDEX y_idx (y) STORING (x), INDEX xy_idx (x, y))
----

exec-ddl
ALTER TABLE t INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 1000,
    "distinct_count": 10,
    "avg_size": 2
  },
  {
    "columns": ["y"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 1000,
    "distinct_count": 10,
    "avg_size": 3
  }
]'
----

exec-ddl
ALTER TABLE u INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10,
    "distinct_count": 2,
    "avg_size": 1
  },
  {
    "columns": ["y"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 10,
    "distinct_count": 2,
    "avg_size": 5
  }
]'
----

# Test that the correct index is used for the lookup join.
opt
SELECT * FROM u WHERE EXISTS (SELECT * FROM t WHERE u.x=t.x AND u.y=t.y);
----
semi-join (lookup t@xy_idx)
 ├── columns: x:1(int) y:2(int)
 ├── key columns: [1 2] = [6 7]
 ├── stats: [rows=10, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0]
 ├── scan u
 │    ├── columns: u.x:1(int) u.y:2(int)
 │    └── stats: [rows=10, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0]
 └── filters (true)

opt
SELECT * FROM u WHERE NOT EXISTS (SELECT * FROM t WHERE u.x=t.x AND u.y=t.y);
----
anti-join (lookup t@xy_idx)
 ├── columns: x:1(int) y:2(int)
 ├── key columns: [1 2] = [6 7]
 ├── stats: [rows=1e-10]
 ├── scan u
 │    ├── columns: u.x:1(int) u.y:2(int)
 │    └── stats: [rows=10, distinct(1)=2, null(1)=0, distinct(2)=2, null(2)=0]
 └── filters (true)


exec-ddl
CREATE TABLE medium (m INT, n INT)
----

exec-ddl
ALTER TABLE medium INJECT STATISTICS '[
  {
    "columns": ["m"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 40,
    "distinct_count": 40,
    "avg_size": 2
  }
]'
----

exec-ddl
CREATE TABLE wxyz (w INT, x INT, y INT, z INT, INDEX (x,y,z))
----

exec-ddl
ALTER TABLE wxyz INJECT STATISTICS '[
  {
    "columns": ["y"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 1000,
    "distinct_count": 11,
    "avg_size": 3,
    "histo_col_type": "int",
    "histo_buckets": [
      {"num_eq": 50, "num_range": 0, "distinct_range": 0, "upper_bound": "0"},
      {"num_eq": 50, "num_range": 900, "distinct_range": 9, "upper_bound": "10"}
    ]
  }
]'
----

# Choose the lookup join due to the highly selective constant column.
opt
SELECT * FROM medium INNER JOIN wxyz ON m=x AND y=10
----
inner-join (lookup wxyz)
 ├── columns: m:1(int!null) n:2(int) w:6(int) x:7(int!null) y:8(int!null) z:9(int)
 ├── key columns: [10] = [10]
 ├── lookup columns are key
 ├── stats: [rows=49.34419, distinct(1)=39.1263, null(1)=0, distinct(7)=39.1263, null(7)=0]
 ├── fd: ()-->(8), (1)==(7), (7)==(1)
 ├── inner-join (lookup wxyz@wxyz_x_y_z_idx)
 │    ├── columns: m:1(int!null) n:2(int) x:7(int!null) y:8(int!null) z:9(int) wxyz.rowid:10(int!null)
 │    ├── key columns: [1 13] = [7 8]
 │    ├── stats: [rows=19.8, distinct(1)=19.8, null(1)=0, distinct(7)=19.8, null(7)=0, distinct(8)=1, null(8)=0, distinct(13)=1, null(13)=0]
 │    ├── fd: ()-->(8), (10)-->(7,9), (1)==(7), (7)==(1)
 │    ├── project
 │    │    ├── columns: "lookup_join_const_col_@8":13(int!null) m:1(int) n:2(int)
 │    │    ├── stats: [rows=40, distinct(1)=40, null(1)=0, distinct(13)=1, null(13)=0]
 │    │    ├── fd: ()-->(13)
 │    │    ├── scan medium
 │    │    │    ├── columns: m:1(int) n:2(int)
 │    │    │    └── stats: [rows=40, distinct(1)=40, null(1)=0]
 │    │    └── projections
 │    │         └── 10 [as="lookup_join_const_col_@8":13, type=int]
 │    └── filters (true)
 └── filters (true)

exec-ddl
ALTER TABLE wxyz INJECT STATISTICS '[
  {
    "columns": ["y"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 1000,
    "distinct_count": 11,
    "avg_size": 3,
    "histo_col_type": "int",
    "histo_buckets": [
      {"num_eq": 10, "num_range": 0, "distinct_range": 0, "upper_bound": "0"},
      {"num_eq": 900, "num_range": 90, "distinct_range": 9, "upper_bound": "10"}
    ]
  }
]'
----

# With a less selective constant column, the hash join should be chosen instead.
opt
SELECT * FROM medium INNER JOIN wxyz ON m=x AND y=10
----
inner-join (hash)
 ├── columns: m:1(int!null) n:2(int) w:6(int) x:7(int!null) y:8(int!null) z:9(int)
 ├── stats: [rows=356.4, distinct(1)=40, null(1)=0, distinct(7)=40, null(7)=0]
 ├── fd: ()-->(8), (1)==(7), (7)==(1)
 ├── select
 │    ├── columns: w:6(int) x:7(int) y:8(int!null) z:9(int)
 │    ├── stats: [rows=900, distinct(7)=100, null(7)=9, distinct(8)=1, null(8)=0]
 │    │   histogram(8)=  0 900
 │    │                <--- 10
 │    ├── fd: ()-->(8)
 │    ├── scan wxyz
 │    │    ├── columns: w:6(int) x:7(int) y:8(int) z:9(int)
 │    │    └── stats: [rows=1000, distinct(7)=100, null(7)=10, distinct(8)=11, null(8)=0]
 │    │        histogram(8)=  0 10  90 900
 │    │                     <--- 0 ---- 10
 │    └── filters
 │         └── y:8 = 10 [type=bool, outer=(8), constraints=(/8: [/10 - /10]; tight), fd=()-->(8)]
 ├── scan medium
 │    ├── columns: m:1(int) n:2(int)
 │    └── stats: [rows=40, distinct(1)=40, null(1)=0]
 └── filters
      └── m:1 = x:7 [type=bool, outer=(1,7), constraints=(/1: (/NULL - ]; /7: (/NULL - ]), fd=(1)==(7), (7)==(1)]
