exec-ddl
CREATE TABLE a (x INT PRIMARY KEY, y INT, s STRING, d DECIMAL NOT NULL, UNIQUE (s DESC, d))
----

exec-ddl
ALTER TABLE a INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 1:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000,
    "avg_size": 2
  },
  {
    "columns": ["x","y"],
    "created_at": "2018-01-01 1:30:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000
  },
  {
    "columns": ["s"],
    "created_at": "2018-01-01 1:30:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 20,
    "avg_size": 9
  },
  {
    "columns": ["y"],
    "created_at": "2018-01-01 1:40:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 100,
    "avg_size": 3
  }
]'
----

# In order to actually create new logical props for the index join, we
# need to call ConstructIndexJoin, which only happens when there is a
# remaining filter.
opt
SELECT count(*) FROM (SELECT * FROM a WHERE s = 'foo' AND x + y = 10) GROUP BY s, y
----
project
 ├── columns: count:7(int!null)
 ├── immutable
 ├── stats: [rows=28.54786]
 └── group-by (hash)
      ├── columns: y:2(int) count_rows:7(int!null)
      ├── grouping columns: y:2(int)
      ├── immutable
      ├── stats: [rows=28.54786, distinct(2)=28.5479, null(2)=0]
      ├── key: (2)
      ├── fd: (2)-->(7)
      ├── select
      │    ├── columns: x:1(int!null) y:2(int) s:3(string!null)
      │    ├── immutable
      │    ├── stats: [rows=33.33333, distinct(2)=28.5479, null(2)=0, distinct(3)=1, null(3)=0]
      │    ├── key: (1)
      │    ├── fd: ()-->(3), (1)-->(2)
      │    ├── index-join a
      │    │    ├── columns: x:1(int!null) y:2(int) s:3(string)
      │    │    ├── stats: [rows=100]
      │    │    ├── key: (1)
      │    │    ├── fd: ()-->(3), (1)-->(2)
      │    │    └── scan a@a_s_d_key
      │    │         ├── columns: x:1(int!null) s:3(string!null)
      │    │         ├── constraint: /-3/4: [/'foo' - /'foo']
      │    │         ├── stats: [rows=100, distinct(3)=1, null(3)=0]
      │    │         ├── key: (1)
      │    │         └── fd: ()-->(3)
      │    └── filters
      │         └── (x:1 + y:2) = 10 [type=bool, outer=(1,2), immutable]
      └── aggregations
           └── count-rows [as=count_rows:7, type=int]

opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=(1,2,3)
SELECT * FROM a WHERE s = 'foo' AND x + y = 10
----
select
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── immutable
 ├── stats: [rows=33.33333, distinct(1)=33.3333, null(1)=0, distinct(2)=28.5479, null(2)=0, distinct(3)=1, null(3)=0, distinct(4)=30.9413, null(4)=0, distinct(1-3)=33.3333, null(1-3)=0]
 ├── key: (1)
 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
 ├── index-join a
 │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    ├── stats: [rows=100]
 │    ├── key: (1)
 │    ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1), (3,4)~~>(1,2)
 │    └── scan a@a_s_d_key
 │         ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
 │         ├── constraint: /-3/4: [/'foo' - /'foo']
 │         ├── stats: [rows=100, distinct(3)=1, null(3)=0]
 │         ├── key: (1)
 │         └── fd: ()-->(3), (1)-->(4), (4)-->(1)
 └── filters
      └── (x:1 + y:2) = 10 [type=bool, outer=(1,2), immutable]

opt colstat=1 colstat=2 colstat=3 colstat=(1,2,3)
SELECT * FROM a WHERE s = 'foo'
----
index-join a
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=64.1514, null(2)=0, distinct(3)=1, null(3)=0, distinct(1-3)=100, null(1-3)=0]
 ├── key: (1)
 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
 └── scan a@a_s_d_key
      ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
      ├── constraint: /-3/4: [/'foo' - /'foo']
      ├── stats: [rows=100, distinct(3)=1, null(3)=0]
      ├── key: (1)
      └── fd: ()-->(3), (1)-->(4), (4)-->(1)

# Note that the row count of the index join does not match the row count of
# the scan, because the index join's row count was carried over from the
# normalized SELECT expression in its memo group (see next test case).
# In order to fix the row count, we need more precise constraint calculation
# for filters.
opt colstat=1 colstat=2 colstat=3 colstat=(2,3) colstat=(1,2,3)
SELECT * FROM a WHERE s = 'foo' OR s = 'bar'
----
index-join a
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=87.8423, null(2)=0, distinct(3)=2, null(3)=0, distinct(2,3)=175.685, null(2,3)=0, distinct(1-3)=200, null(1-3)=0]
 ├── key: (1)
 ├── fd: (1)-->(2-4), (3,4)-->(1,2)
 └── scan a@a_s_d_key
      ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
      ├── constraint: /-3/4
      │    ├── [/'foo' - /'foo']
      │    └── [/'bar' - /'bar']
      ├── stats: [rows=200, distinct(3)=2, null(3)=0]
      ├── key: (1)
      └── fd: (1)-->(3,4), (3,4)-->(1)

norm
SELECT * FROM a WHERE s = 'foo' OR s = 'bar'
----
select
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── stats: [rows=200, distinct(3)=2, null(3)=0]
 ├── key: (1)
 ├── fd: (1)-->(2-4), (3,4)-->(1,2)
 ├── scan a
 │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(3)=20, null(3)=0, distinct(4)=200, null(4)=0]
 │    ├── key: (1)
 │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 └── filters
      └── (s:3 = 'foo') OR (s:3 = 'bar') [type=bool, outer=(3), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; tight)]

# Bump up null counts.
exec-ddl
ALTER TABLE a INJECT STATISTICS '[
  {
    "columns": ["x"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000,
    "avg_size": 2
  },
  {
    "columns": ["x","y"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 2000,
    "null_count": 1000
  },
  {
    "columns": ["s"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 11,
    "null_count": 1000,
    "avg_size": 9
  },
  {
    "columns": ["y"],
    "created_at": "2018-01-01 2:00:00.00000+00:00",
    "row_count": 2000,
    "distinct_count": 101,
    "null_count": 1000,
    "avg_size": 3
  }
]'
----

opt colstat=1 colstat=2 colstat=3 colstat=4 colstat=(1,2,3)
SELECT * FROM a WHERE s = 'foo' AND x + y = 10
----
select
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── immutable
 ├── stats: [rows=33.33333, distinct(1)=33.3333, null(1)=0, distinct(2)=28.5928, null(2)=16.6667, distinct(3)=1, null(3)=0, distinct(4)=30.9413, null(4)=0, distinct(1-3)=33.3333, null(1-3)=0]
 ├── key: (1)
 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
 ├── index-join a
 │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    ├── stats: [rows=100]
 │    ├── key: (1)
 │    ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1), (3,4)~~>(1,2)
 │    └── scan a@a_s_d_key
 │         ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
 │         ├── constraint: /-3/4: [/'foo' - /'foo']
 │         ├── stats: [rows=100, distinct(3)=1, null(3)=0]
 │         ├── key: (1)
 │         └── fd: ()-->(3), (1)-->(4), (4)-->(1)
 └── filters
      └── (x:1 + y:2) = 10 [type=bool, outer=(1,2), immutable]

opt colstat=1 colstat=2 colstat=3 colstat=(1,2,3)
SELECT * FROM a WHERE s = 'foo'
----
index-join a
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── stats: [rows=100, distinct(1)=100, null(1)=0, distinct(2)=64.4233, null(2)=50, distinct(3)=1, null(3)=0, distinct(1-3)=100, null(1-3)=0]
 ├── key: (1)
 ├── fd: ()-->(3), (1)-->(2,4), (4)-->(1,2)
 └── scan a@a_s_d_key
      ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
      ├── constraint: /-3/4: [/'foo' - /'foo']
      ├── stats: [rows=100, distinct(3)=1, null(3)=0]
      ├── key: (1)
      └── fd: ()-->(3), (1)-->(4), (4)-->(1)

# Note that the row count of the index join does not match the row count of
# the scan, because the index join's row count was carried over from the
# normalized SELECT expression in its memo group (see next test case).
# In order to fix the row count, we need more precise constraint calculation
# for filters.
# Also note that we need to tack on an "s IS NOT NULL" clause to make s a
# non-null column in the logical properties because the constraint
# builder at the SELECT level cannot deduce whether an OR'd filter
# is null-rejecting or not.
opt colstat=1 colstat=2 colstat=3 colstat=(2,3) colstat=(1,2,3)
SELECT * FROM a WHERE (s = 'foo' OR s = 'bar') AND s IS NOT NULL
----
index-join a
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── stats: [rows=200, distinct(1)=200, null(1)=0, distinct(2)=88.4619, null(2)=100, distinct(3)=2, null(3)=0, distinct(2,3)=176.924, null(2,3)=0, distinct(1-3)=200, null(1-3)=0]
 ├── key: (1)
 ├── fd: (1)-->(2-4), (3,4)-->(1,2)
 └── scan a@a_s_d_key
      ├── columns: x:1(int!null) s:3(string!null) d:4(decimal!null)
      ├── constraint: /-3/4
      │    ├── [/'foo' - /'foo']
      │    └── [/'bar' - /'bar']
      ├── stats: [rows=200, distinct(3)=2, null(3)=0]
      ├── key: (1)
      └── fd: (1)-->(3,4), (3,4)-->(1)

norm
SELECT * FROM a WHERE (s = 'foo' OR s = 'bar') AND s IS NOT NULL
----
select
 ├── columns: x:1(int!null) y:2(int) s:3(string!null) d:4(decimal!null)
 ├── stats: [rows=200, distinct(3)=2, null(3)=0]
 ├── key: (1)
 ├── fd: (1)-->(2-4), (3,4)-->(1,2)
 ├── scan a
 │    ├── columns: x:1(int!null) y:2(int) s:3(string) d:4(decimal!null)
 │    ├── stats: [rows=2000, distinct(1)=2000, null(1)=0, distinct(3)=11, null(3)=1000, distinct(4)=200, null(4)=0]
 │    ├── key: (1)
 │    └── fd: (1)-->(2-4), (3,4)~~>(1,2)
 └── filters
      └── ((s:3 = 'foo') OR (s:3 = 'bar')) AND (s:3 IS NOT NULL) [type=bool, outer=(3), constraints=(/3: [/'bar' - /'bar'] [/'foo' - /'foo']; tight)]
