select * from t1 where a = (select 1);=>select * from t1 where a = 1;
oe $cmp$ (SELECT ie FROM ... WHERE subq_where ... HAVING subq_having)=>- oe $cmp$ (SELECT MAX(...) ) // handled by Item_singlerow_subselect- oe $cmp$ \<max\>(SELECT ...) // handled by Item_maxmin_subselectfails=>Item_in_optimizer- 对于已经是materialized方案,不转换- 通过equi-join转换IN到EXISTS
SELECT * FROM t1 WHERE a < ANY (SELECT a FROM t1);=>SELECT * FROM t1 WHERE a < (SELECT MAX(a) FROM t1)
SELECT ... FROM t1 WHERE t1.b IN (SELECT <expr of SUM(t1.a)> FROM t2)=>SELECT ... FROM t1 WHERE t1.b IN (SELECT <expr of SUM(t1.a)> FROM t2[trigcond] HAVING t1.b=ref-to-<expr of SUM(t1.a)>)
不需要区分NULL和FALSE的子查询:SELECT 1 FROM ... WHERE (oe $cmp$ ie) AND subq_where需要区分的子查询:SELECT 1 FROM ...WHERE subq_where AND trigcond((oe $cmp$ ie) OR (ie IS NULL))HAVING trigcond(@<is_not_null_test@>(ie))
for (each left operand)create the equi-join conditionif (is_having_used || !abort_on_null)create the "is null" and is_not_null_test itemsif (is_having_used)add the equi-join and the null tests to HAVINGelseadd the equi-join and the "is null" to WHEREadd the is_not_null_test to HAVING
(l1, l2, l3) IN (SELECT v1, v2, v3 ... WHERE where) =>EXISTS (SELECT ... WHERE where and(l1 = v1 or is null v1) and(l2 = v2 or is null v2) and(l3 = v3 or is null v3)[ HAVING is_not_null_test(v1) andis_not_null_test(v2) andis_not_null_test(v3)) ] <-- 保证不为NULL可以去掉HAVING
(l1, l2, l3) IN (SELECT v1, v2, v3 ... HAVING having) =>EXISTS (SELECT ... HAVING having and(l1 = v1 or is null v1) and(l2 = v2 or is null v2) and(l3 = v3 or is null v3) andis_not_null_test(v1) andis_not_null_test(v2) andis_not_null_test(v3))
root:test> set optimizer_switch = 'subquery_to_derived=off';Query OK, 0 rows affected (0.00 sec)root:test> EXPLAIN SELECT b, MAX(a) AS ma FROM t4 GROUP BY b HAVING ma < (SELECT MAX(t2.a) FROM t2 WHERE t2.b=t4.b);+----+--------------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |+----+--------------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+| 1 | PRIMARY | t4 | NULL | ALL | NULL | NULL | NULL | NULL | 10 | 100.00 | Using temporary || 2 | DEPENDENT SUBQUERY | t2 | NULL | ALL | NULL | NULL | NULL | NULL | 3 | 33.33 | Using where |+----+--------------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------+2 rows in set, 3 warnings (0.00 sec)root:test> set optimizer_switch = 'subquery_to_derived=on';Query OK, 0 rows affected (0.00 sec)root:test> EXPLAIN SELECT b, MAX(a) AS ma FROM t4 GROUP BY b HAVING ma < (SELECT MAX(t2.a) FROM t2 WHERE t2.b=t4.b);+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+| 1 | PRIMARY | t4 | NULL | ALL | NULL | NULL | NULL | NULL | 10 | 100.00 | Using temporary || 1 | PRIMARY | <derived2> | NULL | ALL | NULL | NULL | NULL | NULL | 3 | 100.00 | Using where; Using join buffer (hash join) || 2 | DERIVED | t2 | NULL | ALL | NULL | NULL | NULL | NULL | 3 | 100.00 | Using temporary |+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+3 rows in set, 3 warnings (0.01 sec)
SELECT SUM(c1), (SELECT SUM(c1) FROM t3) scalar FROM t1;转换为=>SELECT derived0.summ, derived1.scalarFROM (SELECT SUM(a) AS summ FROM t1) AS derived0LEFT JOIN(SELECT SUM(b) AS scalar FROM t3) AS derived1ON TRUE执行计划如下:explain SELECT SUM(a), (SELECT SUM(c1) FROM t3) scalar FROM t1;+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+| 1 | PRIMARY | <derived3> | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | NULL || 1 | PRIMARY | <derived4> | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | Using where; Using join buffer (hash join) || 4 | DERIVED | t3 | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | NULL || 3 | DERIVED | t1 | NULL | ALL | NULL | NULL | NULL | NULL | 2 | 100.00 | NULL |+----+-------------+------------+------------+------+---------------+------+---------+------+------+----------+--------------------------------------------+
SELECT SUM(a), (SELECT SUM(b) FROM t3) scalarFROM t1HAVING SUM(a) > scalar;=>SELECT derived0.summ, derived1.scalarFROM (SELECT SUM(a) AS summ FROM t1) AS derived0LEFT JOINSUM(b) AS scalar FROM t3) AS derived1ON TRUEWHERE derived0.sum > derived1.scalar;
subq_item->sj_convert_priority =(((dependent * MAX_TABLES_FOR_SIZE) + // dependent subqueries firstchild_query_block->leaf_table_count) *65536) + // then with many tables(65536 - subq_no); // then based on position
for SELECT#1 WHERE X IN (SELECT #2 WHERE Y IN (SELECT#3)) :Query_block::prepare() (select#1)-> fix_fields() on IN condition-> Query_block::prepare() on subquery (select#2)-> fix_fields() on IN condition-> Query_block::prepare() on subquery (select#3)<- Query_block::prepare()<- fix_fields()-> flatten_subqueries: merge #3 in #2<- flatten_subqueries<- Query_block::prepare()<- fix_fields()-> flatten_subqueries: merge #2 in #1
FROM [tables] WHERE ... AND/OR oe IN (SELECT ie FROM it) ...=>FROM (tables) LEFT JOIN (SELECT DISTINCT ie FROM it) AS derivedON oe = derived.ie WHERE ... AND/OR derived.ie IS NOT NULL ...
SELECT ...FROM ot1 ... otNWHERE (oe1, ... oeM) IN (SELECT ie1, ..., ieMFROM it1 ... itK[WHERE inner-cond])[AND outer-cond][GROUP BY ...] [HAVING ...] [ORDER BY ...]=>SELECT ...FROM (ot1 ... otN) SJ (it1 ... itK)ON (oe1, ... oeM) = (ie1, ..., ieM)[AND inner-cond][WHERE outer-cond][GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT ...FROM ot1 ... otNWHERE EXISTS (SELECT expressionsFROM it1 ... itK[WHERE inner-cond])[AND outer-cond][GROUP BY ...] [HAVING ...] [ORDER BY ...]=>SELECT ...FROM (ot1 ... otN) SJ (it1 ... itK)[ON inner-cond][WHERE outer-cond][GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT ...FROM ot1 ... otNWHERE NOT EXISTS (SELECT expressionsFROM it1 ... itK[WHERE inner-cond])[AND outer-cond][GROUP BY ...] [HAVING ...] [ORDER BY ...]=>SELECT ...FROM (ot1 ... otN) AJ (it1 ... itK)[ON inner-cond][WHERE outer-cond AND is-null-cond(it1)][GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT ...FROM ot1 ... otNWHERE (oe1, ... oeM) NOT IN (SELECT ie1, ..., ieMFROM it1 ... itK[WHERE inner-cond])[AND outer-cond][GROUP BY ...] [HAVING ...] [ORDER BY ...]=>SELECT ...FROM (ot1 ... otN) AJ (it1 ... itK)ON (oe1, ... oeM) = (ie1, ..., ieM)[AND inner-cond][WHERE outer-cond][GROUP BY ...] [HAVING ...] [ORDER BY ...]
SELECT @@optimizer_switch\G*************************** 1. row ***************************@@optimizer_switch: ......materialization=on,semijoin=on,loosescan=on,firstmatch=on,subquery_materialization_cost_based=on,......
SELECT * FROM t1 WHERE t1.a in (SELECT t2.c1 FROM t2 where t2.c1 > 0);=>/* select#1 */SELECT `t1`.`a` AS `a`FROM `t1`SEMI JOIN (`t2`)WHERE ((`t1`.`a` = `t2`.`c1`) and (`t2`.`c1` > 0))执行计划如下:explain SELECT * FROM t1 WHERE t1.a in (SELECT t2.c1 FROM t2 where t2.c1 > 0);+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------------------------------------------------+| id | select_type | table | partitions | type | possible_keys | key | key_len | ref | rows | filtered | Extra |+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------------------------------------------------+| 1 | SIMPLE | t2 | NULL | ALL | NULL | NULL | NULL | NULL | 1 | 100.00 | Using where; Start temporary || 1 | SIMPLE | t1 | NULL | ALL | NULL | NULL | NULL | NULL | 2 | 50.00 | Using where; End temporary; Using join buffer (hash join) |+----+-------------+-------+------------+------+---------------+------+---------+------+------+----------+-----------------------------------------------------------+
struct TABLE {......partition_info *part_info{nullptr}; /* Partition related information *//* If true, all partitions have been pruned away */bool all_partitions_pruned_away{false};......}SQL tranformation phaseSELECT_LEX::apply_local_transforms--> prune_partitionsfor example, select * from employee where company_id = 1000 ;SQL optimizer phaseJOIN::prune_table_partitions--> prune_partitions------> based on tbl->join_cond_optim() or JOIN::where_condfor example, explain select * from employee where company_id = (select c1 from t1);
root:ref> CREATE TABLE R2 (-> a INT,-> d INT-> ) PARTITION BY RANGE(a) (-> PARTITION p20 VALUES LESS THAN (20),-> PARTITION p40 VALUES LESS THAN (40),-> PARTITION p60 VALUES LESS THAN (60),-> PARTITION p80 VALUES LESS THAN (80),-> PARTITION p100 VALUES LESS THAN MAXVALUE-> );Query OK, 0 rows affected (0.09 sec)root:ref> Select * From R2 where a > 40 and a < 80;
(start)| $| Partitioning keyparts $ subpartitioning keyparts| $| ... ... $| | | $| +---------+ +---------+ $ +-----------+ +-----------+\-| par1=c1 |--| par2=c2 |-----| subpar1=c3|--| subpar2=c5|+---------+ +---------+ $ +-----------+ +-----------+| $ | || $ | +-----------+| $ | | subpar2=c6|| $ | +-----------+| $ || $ +-----------+ +-----------+| $ | subpar1=c4|--| subpar2=c8|| $ +-----------+ +-----------+| $| $+---------+ $ +------------+ +------------+| par1=c2 |------------------| subpar1=c10|--| subpar2=c12|+---------+ $ +------------+ +------------+| $... $例如第一行(par1=c1 and par2=c2 and subpar1=c3 and subpar2=c5)的遍历的stack将是:in find_used_partitions(key_tree = "subpar2=c5") (***)in find_used_partitions(key_tree = "subpar1=c3")in find_used_partitions(key_tree = "par2=c2") (**)in find_used_partitions(key_tree = "par1=c1")in prune_partitions(...)然后是继续下面的条件,以此类推or(par1=c1 and par2=c2 and subpar1=c3 and subpar2=c6)or(par1=c1 and par2=c2 and subpar1=c4 and subpar2=c8)or(par1=c2 and subpar1=c10 and subpar2=c12)
root:test> set optimizer_switch = 'derived_merge=off'; // 关闭dervied_merge 测试下推能力Query OK, 0 rows affected (0.00 sec)root:test> EXPLAIN FORMAT=tree SELECT * FROM (SELECT c1,c2 FROM t1) as dt WHERE c1 > 10;+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+| EXPLAIN |+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+| -> Table scan on dt (cost=2.51..2.51 rows=1)-> Materialize (cost=2.96..2.96 rows=1)-> Filter: (t1.c1 > 10) (cost=0.35 rows=1)-> Table scan on t1 (cost=0.35 rows=1)|+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
《WL#4389: Subquery optimizations: Make IN optimizations also handle EXISTS》
《WL#4245: Subquery optimization: Transform NOT EXISTS and NOT IN to anti-join》