表
CREATE TABLE `order_exp` ( `id` bigint(22) NOT NULL AUTO_INCREMENT COMMENT , `order_no` varchar(50) NOT NULL COMMENT , `order_note` varchar(100) NOT NULL COMMENT , `insert_time` datetime NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP COMMENT, `expire_duration` bigint(22) NOT NULL COMMENT , `expire_time` datetime NOT NULL COMMENT, `order_status` smallint(6) NOT NULL DEFAULT '0' COMMENT , PRIMARY KEY (`id`) USING BTREE, UNIQUE KEY `u_idx_day_status` (`insert_time`,`order_status`,`expire_time`) USING BTREE, KEY `idx_order_no` (`order_no`) USING BTREE, KEY `idx_expire_time` (`expire_time`) USING BTREE, KEY `ed` (`expire_duration`)) ENGINE=InnoDB AUTO_INCREMENT=10819 DEFAULT CHARSET=utf8 ROW_FORMAT=DYNAMIC;
范围查找 >、<
开启成本 SET optimizer_trace="enabled=on";
查询sql select * from order_exp where expire_duration >0 and expire_duration <100;
查询成本 SELECT * FROM information_schema.optimizer_trace;
计算成本"analyzing_range_alternatives": { "range_scan_alternatives": [ { "index": "ed", "ranges": [ "0 < expire_duration < 100" ], "index_dives_for_eq_ranges": true, "rowid_ordered": false, "using_mrr": false, "index_only": false, "rows": 376, "cost": 452.21, "chosen": true } ], "analyzing_roworder_intersect": { "usable": false, "cause": "too_few_roworder_scans" }},"chosen_range_access_summary": { "range_access_plan": { "type": "range_scan", "index": "ed", "rows": 376, "ranges": [ "0 < expire_duration < 100" ] }, "rows_for_plan": 376, "cost_for_plan": 452.21, "chosen": true}
expire_duration范围查找=1,用二级索引ed,通过explain select * from order_exp where expire_duration >0 and expire_duration <100;查找到rows=376
那么通过ed索引查找的成本:
IO成本=376*1.0376是需要回表的数量,1.0为回表的成本
CPU成本=376*0.2+0.01+376*0.2第一个(376*0.2)是在二级索引进行比较的成本.0.2是cpu比较成本.0.01微调值.(参考上一篇)
总成本=IO成本+CPU成本+范围=(376*1.0)+(376*0.2+0.01+376*0.2)+1)=527.41(在和全表扫描比较时候需要减去回表CPU成本)
成本=总成本-回表CPU成本=527.41-376*0.2=452.21
in执行成本
开启成本 SET optimizer_trace="enabled=on";
查询sql select * from order_exp where expire_duration in(0,1,2,3,... ... ... ,100);
查询成本 SELECT * FROM information_schema.optimizer_trace;
"range_scan_alternatives": [ { "index": "ed", "ranges": [ "1 <= expire_duration <= 1", "2 <= expire_duration <= 2", "3 <= expire_duration <= 3", ... "99 <= expire_duration <= 99", "100 <= expire_duration <= 100" ],"index_dives_for_eq_ranges": true,"rowid_ordered": false,"using_mrr": false,"index_only": false,"rows": 449,"cost": 638.81,"chosen": true}]
(like字段同样,例如name like 's%',范围:'s\u0000…'<name<'s\XEF…').
in里边每一项都会转换为范围,in后边的范围是100项.范围查找=100
通过Exceplain查出rows=449
那么通过ed索引查找的成本:
IO成本:449*1.0449:rows,1.0:IO固定值
CPU成本:449*0.2+0.01+449 *0.2
总成本=IO成本+CPU成本+范围=(449*1.0)+(449*0.2+0.01+449*0.2)+100)=728.61(在和全表扫描时候需要减去回表CPU成本)
成本=总成本-回表CPU成本=728.61-449*0.2=638.81
index dive
在范围查找中,例如 1 <= expire_duration <= 100mysql会先确定1的位置,然后顺序读到100,当读取的叶子节点数量<10的时候,是精确数,当叶子节点>10的时候,会根据前10叶的平均数乘以叶子数.
通过sql语句查询show variables like '%dive%';
image-20230202140632956
查看索引的统计数据SHOW INDEX FROM order_exp;Cardinality为基数,不重复的数,这是大概统计数量,不精确.PRIMARY是主键不重复数量就是表里数据总数.
image-20230202160830036
当in里边的个数<index_dive_limit,通过精确查找,当大于200利用统计数量取估算rows.单个值的不重复数 =ed Cardinality/Primary Cardinality;rows = 范围个数*单个值不重复数
改sql为:`select * from order_exp where expire_duration in(0,1,2,3,... ... ... ,300)
需要修改size大小,要不然显示不全set optimizer_trace_max_mem_size =81920;
"index": "ed","ranges": ["1 <= expire_duration <= 1","2 <= expire_duration <= 2","3 <= expire_duration <= 3","4 <= expire_duration <= 4","298 <= expire_duration <= 298","299 <= expire_duration <= 299","300 <= expire_duration <= 300" ] },"index_dives_for_eq_ranges": false,"rowid_ordered": false,"using_mrr": false,"index_only": false,"rows": 20700,"cost": 25140,"chosen": false,"cause": "cost"
估算rows=300*(10629/154)=20700;rows=20700(里边的除法取整)
IO成本20700*1.0
CPU成本20700*0.2+0.01+20700*0.2
总成本20700*1.0+20700*0.2+0.01+20700*0.2+300=29,280
减去回表CPU成本25,14.01-20700*0.2=25140
改sql,in里边改成不存在的数据,他依然会按照计算的rows去算.导致不能走索引,全表扫描
连接查询
连接查询,查询一次的表叫做驱动表,查询多次的表叫做被驱动表,驱动表查询出来的数据是扇出.成本计算=驱动表一次查询的成本+扇出数*被驱动表的成本.优化应该减少扇出数据,优化被驱动表sql.
内连接查询出来的数据是标准的笛卡尔积假设集合A={a, b},集合B={0, 1, 2},则两个集合的笛卡尔积为{(a, 0), (a, 1), (a, 2), (b, 0), (b, 1), (b, 2)}.总数 = num(A)*num(B)
CREATE TABLE `a1` ( `id` bigint(20) NOT NULL, `age` int(11) DEFAULT NULL, `name` varchar(255) DEFAULT NULL, PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;#创建三张a1,a2,a3.分别添加一百条数据
查询sql1select * from a1 INNER JOIN a2;总数一万条
explain select * from a1 INNER JOIN a2;a2为被驱动表
image-20230202210400023
查询sql2explain select * from a1 INNER JOIN a2 where a2.age>50;a1变成被驱动表了,因为a2扇出数量少.
image-20230202210935361
在INNER JOIN中会比较 AB或者BA成本(A做为驱动表或者B作为驱动表).看谁的成本低.当有三个表连接,会比较3!次(3的阶乘=6)次.当有四个表时候,4!会比较24次.所以一般建议连接不超过3
版权声明:内容来源于互联网和用户投稿 如有侵权请联系删除