基於tidbV6.0探索索引優化思路

語言: CN / TW / HK

作者:邊城元元

原文來源:http://tidb.net/blog/f3ae08ad

一、背景

在分散式資料庫中資料量級大都在千萬以上,用到的最多的sql類似 where ,order by ,limit 的語句。

where,order by的欄位能否同時走索引呢?帶著這個疑問基於 TiDBV6.0 做一個實驗驗證。

二、環境準備

2.1 建立TiDB cluster111

  • 拓撲如下(cluster111-full.yaml)
global:
  user: "tidb"
  ssh_port: 22
  deploy_dir: "/tidb-deploy"
  data_dir: "/tidb-data"
​
# # Monitored variables are applied to all the machines.
monitored:
  node_exporter_port: 9100
  blackbox_exporter_port: 9115
​
server_configs:
  tidb:
    log.slow-threshold: 300
    binlog.enable: false
    binlog.ignore-error: false
  tikv:
    readpool.storage.use-unified-pool: false
    readpool.coprocessor.use-unified-pool: true
  pd:
    schedule.leader-schedule-limit: 4
    schedule.region-schedule-limit: 2048
    schedule.replica-schedule-limit: 64
    replication.location-labels:
      - host
​
pd_servers:
  - host: 10.0.2.15
    # ssh_port: 22
    # name: "pd-1"
    client_port: 2379
    # peer_port: 2380
​
​
tidb_servers:
  - host: 10.0.2.15
​
​
tikv_servers:
  - host: 10.0.2.15
    # ssh_port: 22
    port: 20160
    status_port: 20180
    config:
      server.grpc-concurrency: 4
      #server.labels: {host: "10.0.2.15.20160" }
​
monitoring_servers:
  - host: 10.0.2.15
​
grafana_servers:
  - host: 10.0.2.15
​
alertmanager_servers:
  - host: 10.0.2.15
​
​
# tiup cluster list 
# tiup cluster stop cluster111
# tiup cluster destroy cluster111
​
# 部署cluster111叢集
tiup cluster deploy cluster111 ./cluster111-full.yaml --user root -p
​
tiup cluster start cluster111

image.png

2.2 建立庫表

CREATE DATABASE `b_crm` /*!40100 DEFAULT CHARACTER SET utf8mb4 */ ;
​
drop table if exists `m_test_sort`;
CREATE TABLE `m_test_sort` (
  `id` int(11) NOT NULL AUTO_INCREMENT,
  `orgid` int(11) NOT NULL default 0,
    `labelid` int not null default 0,
    `catid` int not null default 0,
    `productid` int not null default 0,
    `p1` int not null default 0,
    `p2` int not null default 0,
    `p3` int not null default 0,
    `name` varchar(20) not null default '',
    `cust_id` char(30) not null default '', 
  PRIMARY KEY (`id`) ,
    key ix_orgid(`orgid`),
    key ix_labelid_catid(`labelid`,`catid`),
    key ix_p1_p2_p3(`p1`,`p2`,`p3`),
    UNIQUE KEY uix_cust_id(`cust_id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
​

2.3 表資料初始化

-- 插入1條 seed
insert into m_test_sort(orgid,labelid,catid,productid,p1,p2,p3,name,cust_id)values(1,1,1,1,1,1,1,'111','111');
​
-- 執行多次14次
insert into m_test_sort(orgid,labelid,catid,productid,p1,p2,p3,name,cust_id)
select  FLOOR(RAND() * 100),FLOOR(RAND() * 1000),FLOOR(RAND() * 100),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),FLOOR(RAND() * 1000),concat(substring('趙錢孫李周吳鄭王馮陳諸衛蔣沈韓楊朱秦尤許何呂施張孔曹嚴華金魏陶姜戚謝鄒喻柏水竇章雲蘇潘葛奚範彭郎魯韋昌馬苗鳳花方俞任袁柳酆鮑史唐費廉岑薛雷賀倪湯滕殷羅畢郝鄔安常樂於時傅皮齊康伍餘元卜顧孟平黃和穆蕭尹姚邵堪汪祁毛禹狄米貝明臧計伏成戴談宋茅龐熊紀舒屈項祝董粱杜阮藍閔席季麻強賈路婁危江童顏郭梅盛林刁鍾徐邱駱高夏蔡田樊胡凌霍虞萬支柯咎管盧莫經房裘幹解應宗丁宣賁鄧鬱單杭洪包諸左石崔吉鈕龔',floor(1+190*rand()),1),substring('明國華建文平志偉東海強曉生光林小民永傑軍金健一忠洪江福祥中正振勇耀春大寧亮宇興寶少劍雲學仁濤瑞飛鵬安亞澤世漢達衛利勝敏群波成榮新峰剛家龍德慶斌輝良玉俊立浩天巨集子松克清長嘉紅山賢陽樂鋒智青躍元武廣思雄錦威啟昌銘維義宗英凱鴻森超堅旭政傳康繼翔棟仲權奇禮楠煒友年震鑫雷兵萬星駿倫紹麟雨行才希彥兆貴源有景升惠臣慧開章潤高佳虎根遠力進泉茂毅富博霖順信凡豪樹和恩向道川彬柏磊敬書鳴芳培全炳基冠暉京欣廷哲保秋君勁軒帆若連勳祖錫吉崇鈞田石奕發洲彪鋼運伯滿庭申湘皓承梓雪孟其潮冰懷魯裕翰徵謙航士堯標潔城壽楓革純風化逸騰嶽銀鶴琳顯煥來心鳳睿勤延凌昊西羽百捷定琦聖佩麒虹如靖日詠會久昕黎桂瑋燕可越彤雁孝憲萌穎藝夏桐月瑜沛誠夫聲冬奎揚雙坤鎮楚水鐵喜之迪泰方同濱邦先聰朝善非恆晉汝丹為晨乃秀巖辰洋然厚燦卓楊鈺蘭怡靈淇美琪亦晶舒菁真涵爽雅愛依靜棋宜男蔚芝菲露娜珊雯淑曼萍珠詩璇琴素梅玲蕾豔紫珍麗儀夢倩伊茜妍碧芬兒嵐婷菊妮媛蓮娟一',floor(1+400*rand()),1),substring('明國華建文平志偉東海強曉生光林小民永傑軍金健一忠洪江福祥中正振勇耀春大寧亮宇興寶少劍雲學仁濤瑞飛鵬安亞澤世漢達衛利勝敏群波成榮新峰剛家龍德慶斌輝良玉俊立浩天巨集子松克清長嘉紅山賢陽樂鋒智青躍元武廣思雄錦威啟昌銘維義宗英凱鴻森超堅旭政傳康繼翔棟仲權奇禮楠煒友年震鑫雷兵萬星駿倫紹麟雨行才希彥兆貴源有景升惠臣慧開章潤高佳虎根遠力進泉茂毅富博霖順信凡豪樹和恩向道川彬柏磊敬書鳴芳培全炳基冠暉京欣廷哲保秋君勁軒帆若連勳祖錫吉崇鈞田石奕發洲彪鋼運伯滿庭申湘皓承梓雪孟其潮冰懷魯裕翰徵謙航士堯標潔城壽楓革純風化逸騰嶽銀鶴琳顯煥來心鳳睿勤延凌昊西羽百捷定琦聖佩麒虹如靖日詠會久昕黎桂瑋燕可越彤雁孝憲萌穎藝夏桐月瑜沛誠夫聲冬奎揚雙坤鎮楚水鐵喜之迪泰方同濱邦先聰朝善非恆晉汝丹為晨乃秀巖辰洋然厚燦卓楊鈺蘭怡靈淇美琪亦晶舒菁真涵爽雅愛依靜棋宜男蔚芝菲露娜珊雯淑曼萍珠詩璇琴素梅玲蕾豔紫珍麗儀夢倩伊茜妍碧芬兒嵐婷菊妮媛蓮娟一',floor(1+400*rand()),if(rand()>0.6,0,1))),substring(md5(concat(rand(),rand(),rand(),rand())), 1, 30) from m_test_sort;
​
-- 16384 條記錄
select count(*) from m_test_sort;
​

image.png

三、測試

主鍵忽略

-- 查詢 最多的orgid
select orgid ,count(*) as ct from m_test_sort GROUP BY orgid ORDER BY ct desc limit 10;
​

image.png

3.1 二級索引 ix_orgid(orgid)

3.1.1 where:orgid,order:id

-- where:orgid索引;order:id索引讀
EXPLAIN ANALYZE  select id from m_test_sort where orgid=8 order by id limit 100;

image.png

3.1.2 where:orgid,order:cust_id

-- where:orgid索引;order:回表 cust_id雖然是唯一索引但是還是回表
EXPLAIN ANALYZE  select id from m_test_sort where orgid=8 order by cust_id limit 100  ;

image.png

3.1.3 where:orgid,order:productid

-- where:orgid索引;order:回表  productid 沒有索引
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by productid limit 100 ;

image.png

3.1.4 where:orgid,order:catid

-- where:orgid索引;order:回表  
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by catid limit 100;

image.png

3.1.5 where:orgid,order:labelid

-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=85 order by labelid  limit 100;

image.png

3.2 複合索引兩列 ix_labelid_catid(labelid,catid)

-- 查詢 最多的 labelid
select labelid ,count(*) as ct from m_test_sort GROUP BY labelid ORDER BY ct desc limit 10;

image.png

3.2.1 where:labelid,order:id

-- where:labelid索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by id  limit 100; 

image.png

3.2.2 where:labelid,order:catid

-- where:labelid索引,order:catid 走了複合索引的第二個欄位
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by catid  limit 100;

image.png

3.2.3 where:labelid,order:labelid

-- where:labelid索引,order:labelid 索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by labelid  limit 100;

image.png

3.2.4 where:labelid,order:cust_id

-- where:labelid索引,order:回表
EXPLAIN ANALYZE  select id from m_test_sort where labelid=962 order by cust_id limit 100  ;

image.png

3.2.5 where:labelid,order:productid

-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by productid  limit 100;

image.png

3.2.6 where:labelid,order:orgid

-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by orgid limit 100;

image.png

3.3複合索引三列 ix_p1_p2_p3(p1,p2,p3)

-- 查詢 最多的 labelid
select p1 ,count(*) as ct from m_test_sort GROUP BY p1 ORDER BY ct desc limit 10;

image.png

3.3.1 where:p1,order:id

-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by id  limit 100;  

image.png

3.3.2 where:p1,order:p1

-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p1  limit 100;

image.png

3.3.3 where:p1,order:p2

-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p2  limit 100;

image.png

3.3.4 where:p1,order:p3

-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p3  limit 100;

image.png

3.3.5 where:p1,p2,order:p3

-- where:p1,p2索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 and p2=32 order by p3  limit 100;

image.png

3.3.6 where:p1,order:labelid

-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by labelid  limit 100;

image.png

3.3.7 where:p1,order:cust_id

-- where:p1索引,order:回表
EXPLAIN ANALYZE  select id from m_test_sort where p1=346 order by cust_id limit 100  ;

image.png

3.3.8 where:p1,order:productid

-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by productid  limit 100;

3.3.9 where:p1,order:orgid

-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by orgid limit 100;

image.png

3.3.10 where:p1,order:catid

-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by catid  limit 100;

image.png

3.4 關於TopN 與limit

參考:http://docs.pingcap.com/zh/tidb/stable/topn-limit-push-down

Limit 節點等價於一個排序規則為空的 TopN 節點。

SQL 中的 LIMIT 子句在 TiDB 查詢計劃樹中對應 Limit 運算元節點,ORDER BY 子句在查詢計劃樹中對應 Sort 運算元節點,此外,我們會將相鄰的 Limit 和 Sort 運算元組合成 TopN 運算元節點,表示按某個排序規則提取記錄的前 N 項。從另一方面來說,Limit 節點等價於一個排序規則為空的 TopN 節點。

和謂詞下推類似,TopN(及 Limit,下同)下推將查詢計劃樹中的 TopN 計算儘可能下推到距離資料來源最近的地方,以儘早完成資料的過濾,進而顯著地減少資料傳輸或計算的開銷。

四、總結

4.1 執行計劃彙總

-- 查詢 最多的orgid
select orgid ,count(*) as ct from m_test_sort GROUP BY orgid ORDER BY ct desc limit 10;
​
-- 二級索引
-- where:orgid索引;order:id索引讀
EXPLAIN ANALYZE  select id from m_test_sort where orgid=8 order by id limit 100  ;
-- where:orgid索引;order:回表 cust_id雖然是唯一索引但是還是回表
EXPLAIN ANALYZE  select id from m_test_sort where orgid=8 order by cust_id limit 100  ;
-- where:orgid索引;order:回表  productid 沒有索引
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by productid limit 100 ;
-- where:orgid索引;order:回表  
EXPLAIN ANALYZE select id from m_test_sort where orgid=8 order by catid limit 100;
-- where:orgid索引;order:回表
EXPLAIN ANALYZE select id from m_test_sort where orgid=85 order by labelid  limit 100;
​
​
​
--------------- 
-- 複合索引 2列複合
-- 查詢 最多的 labelid
select labelid ,count(*) as ct from m_test_sort GROUP BY labelid ORDER BY ct desc limit 10;
-- where:labelid索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by id  limit 100;    
-- where:labelid索引,order:catid 走了複合索引的第二個欄位
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by catid  limit 100;
-- where:labelid索引,order:labelid 索引讀
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by labelid  limit 100;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE  select id from m_test_sort where labelid=962 order by cust_id limit 100  ;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by productid  limit 100;
-- where:labelid索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where labelid=962 order by orgid limit 100;
​
--------------- 
-- 複合索引 3列複合
-- 查詢 最多的 labelid
select p1 ,count(*) as ct from m_test_sort GROUP BY p1 ORDER BY ct desc limit 10;
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by id  limit 100;    
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p1  limit 100;
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p2  limit 100;
-- where:p1索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by p3  limit 100;
-- where:p1,p2索引,order:索引讀
EXPLAIN ANALYZE select id from m_test_sort where p1=346 and p2=32 order by p3  limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by labelid  limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE  select id from m_test_sort where p1=346 order by cust_id limit 100  ;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by productid  limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by orgid limit 100;
-- where:p1索引,order:回表
EXPLAIN ANALYZE select id from m_test_sort where p1=346 order by catid  limit 100;
​

4.2 sql語句建議

  • where 條件儘量走索引
  • order by 儘量避免回表

謝謝 TiDB社群,謝謝TiDBer,後續會探索更多的TiDB相關的技術!