基於tidbV6.0探索索引優化思路
作者:邊城元元
一、背景
在分散式資料庫中資料量級大都在千萬以上,用到的最多的sql類似 where ,order by ,limit 的語句。
where,order by的欄位能否同時走索引呢?帶著這個疑問基於 TiDBV6.0 做一個實驗驗證。
二、環境準備
2.1 建立TiDB cluster111
- 拓撲如下(cluster111-full.yaml)
global:
user: "tidb"
ssh_port: 22
deploy_dir: "/tidb-deploy"
data_dir: "/tidb-data"
# # Monitored variables are applied to all the machines.
monitored:
node_exporter_port: 9100
blackbox_exporter_port: 9115
server_configs:
tidb:
log.slow-threshold: 300
binlog.enable: false
binlog.ignore-error: false
tikv:
readpool.storage.use-unified-pool: false
readpool.coprocessor.use-unified-pool: true
pd:
schedule.leader-schedule-limit: 4
schedule.region-schedule-limit: 2048
schedule.replica-schedule-limit: 64
replication.location-labels:
- host
pd_servers:
- host: 10.0.2.15
# ssh_port: 22
# name: "pd-1"
client_port: 2379
# peer_port: 2380
tidb_servers:
- host: 10.0.2.15
tikv_servers:
- host: 10.0.2.15
# ssh_port: 22
port: 20160
status_port: 20180
config:
server.grpc-concurrency: 4
#server.labels: {host: "10.0.2.15.20160" }
monitoring_servers:
- host: 10.0.2.15
grafana_servers:
- host: 10.0.2.15
alertmanager_servers:
- host: 10.0.2.15
-
部署叢集
具體的部署可以參考文章 http://tidb.net/blog/af8080f7#TiDB-最小實踐Cluster111
# tiup cluster list
# tiup cluster stop cluster111
# tiup cluster destroy cluster111
# 部署cluster111叢集
tiup cluster deploy cluster111 ./cluster111-full.yaml --user root -p
tiup cluster start cluster111
2.2 建立庫表
CREATE DATABASE `b_crm` /*!40100 DEFAULT CHARACTER SET utf8mb4 */ ;
drop table if exists `m_test_sort`;
CREATE TABLE `m_test_sort` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`orgid` int(11) NOT NULL default 0,
`labelid` int not null default 0,
`catid` int not null default 0,
`productid` int not null default 0,
`p1` int not null default 0,
`p2` int not null default 0,
`p3` int not null default 0,
`name` varchar(20) not null default '',
`cust_id` char(30) not null default '',
PRIMARY KEY (`id`) ,
key ix_orgid(`orgid`),
key ix_labelid_catid(`labelid`,`catid`),
key ix_p1_p2_p3(`p1`,`p2`,`p3`),
UNIQUE KEY uix_cust_id(`cust_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
2.3 表資料初始化
-- 插入1條 seed
insert into m_test_sort(orgid,labelid,catid,productid,p1,p2,p3,name,cust_id)values(1,1,1,1,1,1,1,'111','111');
-- 執行多次14次
insert into m_test_sort(orgid,labelid,catid,productid,p1,p2,p3,name,cust_id)
select FLOOR(RAND() * 100),FLOOR(RAND() * 1000),FLOOR(RAND() * 100),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),concat(substring('趙錢孫李周吳鄭王馮陳諸衛蔣沈韓楊朱秦尤許何呂施張孔曹嚴華金魏陶姜戚謝鄒喻柏水竇章雲蘇潘葛奚範彭郎魯韋昌馬苗鳳花方俞任袁柳酆鮑史唐費廉岑薛雷賀倪湯滕殷羅畢郝鄔安常樂於時傅皮齊康伍餘元卜顧孟平黃和穆蕭尹姚邵堪汪祁毛禹狄米貝明臧計伏成戴談宋茅龐熊紀舒屈項祝董粱杜阮藍閔席季麻強賈路婁危江童顏郭梅盛林刁鍾徐邱駱高夏蔡田樊胡凌霍虞萬支柯咎管盧莫經房裘幹解應宗丁宣賁鄧鬱單杭洪包諸左石崔吉鈕龔',floor(1+190*rand()),1),substring('明國華建文平志偉東海強曉生光林小民永傑軍金健一忠洪江福祥中正振勇耀春大寧亮宇興寶少劍雲學仁濤瑞飛鵬安亞澤世漢達衛利勝敏群波成榮新峰剛家龍德慶斌輝良玉俊立浩天巨集子松克清長嘉紅山賢陽樂鋒智青躍元武廣思雄錦威啟昌銘維義宗英凱鴻森超堅旭政傳康繼翔棟仲權奇禮楠煒友年震鑫雷兵萬星駿倫紹麟雨行才希彥兆貴源有景升惠臣慧開章潤高佳虎根遠力進泉茂毅富博霖順信凡豪樹和恩向道川彬柏磊敬書鳴芳培全炳基冠暉京欣廷哲保秋君勁軒帆若連勳祖錫吉崇鈞田石奕發洲彪鋼運伯滿庭申湘皓承梓雪孟其潮冰懷魯裕翰徵謙航士堯標潔城壽楓革純風化逸騰嶽銀鶴琳顯煥來心鳳睿勤延凌昊西羽百捷定琦聖佩麒虹如靖日詠會久昕黎桂瑋燕可越彤雁孝憲萌穎藝夏桐月瑜沛誠夫聲冬奎揚雙坤鎮楚水鐵喜之迪泰方同濱邦先聰朝善非恆晉汝丹為晨乃秀巖辰洋然厚燦卓楊鈺蘭怡靈淇美琪亦晶舒菁真涵爽雅愛依靜棋宜男蔚芝菲露娜珊雯淑曼萍珠詩璇琴素梅玲蕾豔紫珍麗儀夢倩伊茜妍碧芬兒嵐婷菊妮媛蓮娟一',floor(1+400*rand()),1),substring('明國華建文平志偉東海強曉生光林小民永傑軍金健一忠洪江福祥中正振勇耀春大寧亮宇興寶少劍雲學仁濤瑞飛鵬安亞澤世漢達衛利勝敏群波成榮新峰剛家龍德慶斌輝良玉俊立浩天巨集子松克清長嘉紅山賢陽樂鋒智青躍元武廣思雄錦威啟昌銘維義宗英凱鴻森超堅旭政傳康繼翔棟仲權奇禮楠煒友年震鑫雷兵萬星駿倫紹麟雨行才希彥兆貴源有景升惠臣慧開章潤高佳虎根遠力進泉茂毅富博霖順信凡豪樹和恩向道川彬柏磊敬書鳴芳培全炳基冠暉京欣廷哲保秋君勁軒帆若連勳祖錫吉崇鈞田石奕發洲彪鋼運伯滿庭申湘皓承梓雪孟其潮冰懷魯裕翰徵謙航士堯標潔城壽楓革純風化逸騰嶽銀鶴琳顯煥來心鳳睿勤延凌昊西羽百捷定琦聖佩麒虹如靖日詠會久昕黎桂瑋燕可越彤雁孝憲萌穎藝夏桐月瑜沛誠夫聲冬奎揚雙坤鎮楚水鐵喜之迪泰方同濱邦先聰朝善非恆晉汝丹為晨乃秀巖辰洋然厚燦卓楊鈺蘭怡靈淇美琪亦晶舒菁真涵爽雅愛依靜棋宜男蔚芝菲露娜珊雯淑曼萍珠詩璇琴素梅玲蕾豔紫珍麗儀夢倩伊茜妍碧芬兒嵐婷菊妮媛蓮娟一',floor(1+400*rand()),if(rand()>0.6,0,1))),substring(md5(concat(rand(),rand(),rand(),rand())), 1, 30) from m_test_sort;
-- 16384 條記錄
select count(*) from m_test_sort;
三、測試
主鍵忽略
-- 查詢 最多的orgid
select orgid ,count(*) as ct from m_test_sort GROUP BY orgid ORDER BY ct desc limit 10;
3.1 二級索引 ix_orgid(orgid
)
3.1.1 where:orgid,order:id
-- where:orgid索引;order:id索引讀
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by id limit 100;
3.1.2 where:orgid,order:cust_id
-- where:orgid索引;order:回表 cust_id雖然是唯一索引但是還是回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by cust_id limit 100 ;
3.1.3 where:orgid,order:productid
-- where:orgid索引;order:回表 productid 沒有索引
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by productid limit 100 ;
3.1.4 where:orgid,order:catid
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by catid limit 100;
3.1.5 where:orgid,order:labelid
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=85 order by labelid limit 100;
3.2 複合索引兩列 ix_labelid_catid(labelid
,catid
)
-- 查詢 最多的 labelid
select labelid ,count(*) as ct from m_test_sort GROUP BY labelid ORDER BY ct desc limit 10;
3.2.1 where:labelid,order:id
-- where:labelid索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by id limit 100;
3.2.2 where:labelid,order:catid
-- where:labelid索引,order:catid 走了複合索引的第二個欄位
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by catid limit 100;
3.2.3 where:labelid,order:labelid
-- where:labelid索引,order:labelid 索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by labelid limit 100;
3.2.4 where:labelid,order:cust_id
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by cust_id limit 100 ;
3.2.5 where:labelid,order:productid
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by productid limit 100;
3.2.6 where:labelid,order:orgid
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by orgid limit 100;
3.3複合索引三列 ix_p1_p2_p3(p1
,p2
,p3
)
-- 查詢 最多的 labelid
select p1 ,count(*) as ct from m_test_sort GROUP BY p1 ORDER BY ct desc limit 10;
3.3.1 where:p1,order:id
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by id limit 100;
3.3.2 where:p1,order:p1
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p1 limit 100;
3.3.3 where:p1,order:p2
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p2 limit 100;
3.3.4 where:p1,order:p3
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p3 limit 100;
3.3.5 where:p1,p2,order:p3
-- where:p1,p2索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 and p2=32 order by p3 limit 100;
3.3.6 where:p1,order:labelid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by labelid limit 100;
3.3.7 where:p1,order:cust_id
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by cust_id limit 100 ;
3.3.8 where:p1,order:productid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by productid limit 100;
3.3.9 where:p1,order:orgid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by orgid limit 100;
3.3.10 where:p1,order:catid
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by catid limit 100;
3.4 關於TopN 與limit
參考:http://docs.pingcap.com/zh/tidb/stable/topn-limit-push-down
Limit 節點等價於一個排序規則為空的 TopN 節點。
SQL 中的 LIMIT 子句在 TiDB 查詢計劃樹中對應 Limit 運算元節點,ORDER BY 子句在查詢計劃樹中對應 Sort 運算元節點,此外,我們會將相鄰的 Limit 和 Sort 運算元組合成 TopN 運算元節點,表示按某個排序規則提取記錄的前 N 項。從另一方面來說,Limit 節點等價於一個排序規則為空的 TopN 節點。
和謂詞下推類似,TopN(及 Limit,下同)下推將查詢計劃樹中的 TopN 計算儘可能下推到距離資料來源最近的地方,以儘早完成資料的過濾,進而顯著地減少資料傳輸或計算的開銷。
四、總結
4.1 執行計劃彙總
-- 查詢 最多的orgid
select orgid ,count(*) as ct from m_test_sort GROUP BY orgid ORDER BY ct desc limit 10;
-- 二級索引
-- where:orgid索引;order:id索引讀
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by id limit 100 ;
-- where:orgid索引;order:回表 cust_id雖然是唯一索引但是還是回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by cust_id limit 100 ;
-- where:orgid索引;order:回表 productid 沒有索引
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by productid limit 100 ;
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by catid limit 100;
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=85 order by labelid limit 100;
---------------
-- 複合索引 2列複合
-- 查詢 最多的 labelid
select labelid ,count(*) as ct from m_test_sort GROUP BY labelid ORDER BY ct desc limit 10;
-- where:labelid索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by id limit 100;
-- where:labelid索引,order:catid 走了複合索引的第二個欄位
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by catid limit 100;
-- where:labelid索引,order:labelid 索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by labelid limit 100;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by cust_id limit 100 ;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by productid limit 100;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by orgid limit 100;
---------------
-- 複合索引 3列複合
-- 查詢 最多的 labelid
select p1 ,count(*) as ct from m_test_sort GROUP BY p1 ORDER BY ct desc limit 10;
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by id limit 100;
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p1 limit 100;
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p2 limit 100;
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p3 limit 100;
-- where:p1,p2索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 and p2=32 order by p3 limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by labelid limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by cust_id limit 100 ;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by productid limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by orgid limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by catid limit 100;
4.2 sql語句建議
- where 條件儘量走索引
- order by 儘量避免回表
謝謝 TiDB社群,謝謝TiDBer,後續會探索更多的TiDB相關的技術!
「其他文章」
- 你踩過這些坑嗎?謹慎在時間型別列上建立索引
- TiDB和C#的簡單CRUD應用程式
- TiDB VS MySQL
- TIDB監控升級解決panic的漫漫探索之路
- 記憶體悲觀鎖原理淺析與實踐
- TiDB 效能分析&效能調優&優化實踐大全
- TiDB 效能分析和優化
- tiflash 6.0 on k8s擴容與新特性實踐
- 論分散式資料庫TiDB架構的“存”與“算”
- MySQL正常執行的SQL在TiDB中變慢了
- 排查分析Empty regions 較大原因
- 基於tidbV6.0探索索引優化思路
- TiDB HTAP特性的應用場景簡析
- 記憶體悲觀鎖
- 用一個性能提升了666倍的小案例說明在TiDB中正確使用索引的重要性
- TiDB 6.0 新特性解讀 | Collation 規則
- 一個小操作,SQL查詢速度翻了1000倍。
- 一個小操作,SQL查詢速度翻了1000倍。
- Oceanbase和TiDB粗淺對比之 - 執行計劃
- TiKV 縮容不掉如何解決?