Pyecharts結合Flask案例 雙色球資料分析

語言: CN / TW / HK

theme: cyanosis highlight: default


我正在參加「掘金·啟航計劃」

近段時間又迷上了雙色球,即使已經知道這是一個隨機過程。曾經一個朋友說過:一注彩票付出兩塊錢,帶來的是對生活的一份期待。當然研究彩票不是目的而是手段,主要還是藉此學習一下視覺化庫pyecharts。

官方文件 pyecharts - A Python Echarts Plotting Library built with love.
圖表Demo - Document (pyecharts.org)

資料抓取

資料來源:
http://datachart.500.com/ssq/history/history.shtml
F12發現數據請求介面網址是:
http://datachart.500.com/ssq/history/newinc/history.php?start=22085&end=22114

返回的是html文字,結果如下,我們需要解析的資料位於<tbody id="tdata">

image.png

這裡通過xpath來提取(也可以用BeautifulSoup)。首先找到id為tdatatbody標籤,然後選取屬性為t_tr1的tr子節點,再遍歷所有子節點,找到對應的期數+6個紅球+1個藍球+開獎日期這9列。不知道是不是編碼的原因,提取出來的文字型別為lxml.etree._ElementUnicodeResult,需要將它轉換為字串 。

詳細程式碼:

```python def fetch_data(start: int = None, end: int = '', limit: int = None) -> List[list]: """抓取並解析資料""" base_url = "http://datachart.500.com/ssq/history/newinc/history.php" url = f"{base_url}?start={start}&end={end}" if limit: url = f"{base_url}?limit={limit}&sort=0"

res = requests.get(url)
html = etree.HTML(res.content.decode("utf-8"))
# 解析為字串,並按照utf8解碼
# doc = etree.tostring(html, pretty_print=True, encoding="utf-8").decode('utf-8')
el = html.xpath("//tbody[@id='tdata']/tr[@class='t_tr1']")
result = []
for e in el:
    # 提取指定位置的td
    # 用這種方式也可:e.xpath("./td[not(text()='\xa0')]/text()"),其中\xa0表示不間斷空白符,即&nbsp;
    temp = e.xpath("./td[not(position()>=9 and position()<=last()-1)]/text()")
    # 提取出來的文字型別為lxml.etree._ElementUnicodeResult,需要轉換為字串
    result.append(list(map(str, temp)))
# df = data_processing(result)
return result

```

資料處理

接下來將上一步返回的資料列表轉換為Dataframe,為了方便後續分析,日期列設為索引,資料型別也要做相應的轉換,並且新構建了兩個特徵:奇偶次數和連號個數。
詳細程式碼: ``` @classmethod def data_processing(cls, data: List[list]) -> pd.DataFrame: df = pd.DataFrame(data, columns=['period'] + cls.cols + ['date']).sort_values('date') df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d') df.set_index('date', inplace=True) # 轉換為int型別 df = df.astype({i: int for i in cls.cols}) return cls.build_feature(df)

@classmethod def build_feature(cls, data: pd.DataFrame) -> pd.DataFrame: """構建特徵:奇偶個數、連號個數""" def is_odd(x): count = 0 for i in x: if i & 1 == 1: count += 1 return count

def is_continuous(x):
    count = 1
    q = [x[0]]
    for i in range(1, len(x)):
        if q and x[i] - q[-1] != 1:
            count = max(len(q), count)
            q.clear()
        q.append(x[i])
    return max(len(q), count)

data['odd'] = data[cls.cols].apply(lambda x: is_odd(x), axis=1)
data['continuous'] = data[cls.cols].apply(lambda x: is_continuous(x), axis=1)

return data

```

繪製圖表

此前前端vue專案用過echarts庫,所以對其中的配置引數還有點印象,官方文件也很詳細,包含各種圖表的示例demo。

關於我第一次使用pyecharts遇到的坑:
1. 若使用jupyter環境,必須宣告環境型別,否則圖表無法顯示。 ```python

jupyter notebook環境

from pyecharts.globals import ThemeType, CurrentConfig, CurrentConfig.NOTEBOOK_TYPE = NotebookType.JUPYTER_NOTEBOOK `` 2. pyecharts不支援pandas中的資料型別,可以通過tolist()方法轉換 3.Page元件中無法嵌入Tab元件,會報錯找不到chart_id 4. 以百分比形式為Tab元件內嵌的Line折線圖設定圖表寬度,無法正常顯示,而px單位可以(不知道是不是bug) 5. 圖表渲染方式: 生成html檔案:render()jupyter notebook環境:render_notebook()Flask或Django直接渲染:render_embed()`

繪圖相關程式碼:

由於建立圖表基本上都涉及全域性配置和初始化配置項,自定義了兩個方法方便呼叫

```python import pyecharts.options as opts from pyecharts.globals import ThemeType

def update_global_opts(chart, title_opts=None, xaxis_opts=None, yaxis_opts=None, legend_opts=None, x_rotate=None): # 圖表型別 # chart_type = chart.get_options()['series'][0]['type'] dic = dict( xaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True), axislabel_opts=opts.LabelOpts(rotate=x_rotate)), yaxis_opts=opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True)), tooltip_opts=opts.TooltipOpts(trigger="axis", axis_pointer_type="cross"), toolbox_opts=opts.ToolboxOpts(is_show=True, orient='horizontal', feature={ "saveAsImage": {}, "dataZoom": {"yAxisIndex": "none"}, "restore": {}, "magicType": {"show": True, "type": ["line", "bar"]}, "dataView": {}, } ), )

# 標題配置
if title_opts:
    dic['title_opts'] = opts.TitleOpts(**title_opts)
if xaxis_opts:
    dic['xaxis_opts'] = opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True),
                                      axislabel_opts=opts.LabelOpts(rotate=x_rotate), **xaxis_opts)
if yaxis_opts:
    dic['yaxis_opts'] = opts.AxisOpts(splitline_opts=opts.SplitLineOpts(is_show=True),**yaxis_opts)
# 圖例配置
if legend_opts:
    dic['legend_opts'] = opts.LegendOpts(**legend_opts)

return dic

def update_init_opts(width="900px", theme=None, bg_color=None): dic = dict( width=width, theme=ThemeType.WESTEROS, # bg_color='rgba(48, 56, 69, 0.9)', ) if theme: dic['theme'] = theme if bg_color: dic['bg_color'] = bg_color init_opts = opts.InitOpts(**dic) return init_opts ```

具體繪圖程式碼: ```python def draw_trend(self) -> Page: """繪製各號碼走勢曲線及出現次數分析""" data = self.data page = Page(layout=Page.DraggablePageLayout, page_title='實時彩票分析平臺') start, end = data.index[0].strftime("%Y-%m-%d"), data.index[-1].strftime("%Y-%m-%d")

for col in self.cols:
    count = data[col].value_counts().sort_index()
    bar = (
        Bar(init_opts=update_init_opts())
            .add_yaxis('出現次數', count.values.tolist())
            .add_xaxis(count.index.tolist())
            .reversal_axis()
    )

    line = (
        Line(init_opts=update_init_opts())
            .add_xaxis(data.index.strftime("%m-%d").tolist())
            .add_yaxis('號碼', data[col].values.tolist(), linestyle_opts=opts.LineStyleOpts(width=2))
    )

    # 'pos_left': '2%'表示右邊留白2%
    global_opts_bar = update_global_opts(bar,
                                         yaxis_opts={'type_': 'category'},
                                         title_opts={'title': f'【{start}至{end}】{col}次數', 'pos_left': '2%'},
                                         legend_opts={'pos_left': '25%', 'is_show': False})
    global_opts_line = update_global_opts(line,
                                          x_rotate=45,
                                          title_opts={'title': f'{col}號碼走勢', 'pos_left': '40%'},
                                          legend_opts={'pos_right': '25%', 'is_show': False}, )

    line.set_global_opts(**global_opts_line)
    bar.set_global_opts(**global_opts_bar)

    grid = (
        Grid(init_opts=update_init_opts(width='100%'))
            .add(bar, grid_opts=opts.GridOpts(pos_right="75%"))
            .add(line, grid_opts=opts.GridOpts(pos_left="35%"))
    )

    page.add(grid)
return page

def draw_features(self) -> Line: data = self.data start, end = data.index[0].strftime("%Y-%m-%d"), data.index[-1].strftime("%Y-%m-%d") line = ( Line(init_opts=update_init_opts(width='100%')) .add_xaxis(data.index.strftime("%m-%d").tolist()) .add_yaxis('奇數個數', data['odd'].values.tolist()) .add_yaxis('連號個數', data['continuous'].values.tolist()) )

global_opts = update_global_opts(line, x_rotate=30)
line.set_global_opts(**global_opts)

return line

```

結合Flask

步驟: - 建立Flask專案 - 找到pyecharts庫安裝位置,拷貝路徑pyecharts.render.templates中的檔案至專案templates目錄中 - 設定環境變數 ```python CurrentConfig.GLOBAL_ENV = Environment(loader=FileSystemLoader("./templates")) - 執行

按照官網文件中from jinja2 import Markup會報錯,改為from jinja2.utils import markupsafe

Flask app.py檔案: ```python from typing import List import requests

from flask import Flask, request, redirect, url_for from jinja2.utils import markupsafe from jinja2 import Environment, FileSystemLoader from lxml import etree

from pyecharts.globals import CurrentConfig CurrentConfig.GLOBAL_ENV = Environment(loader=FileSystemLoader("./templates"))

from lottery_predict import Lottery

app = Flask(name, static_folder="templates")

@app.route('/') def index(): return redirect(url_for('trend'))

@app.route('/trend') def trend(): limit = request.args.get('limit') if not limit: limit = 30 data = fetch_data(limit=int(limit)) chart = Lottery(data).draw_trend() return markupsafe.Markup(chart.render_embed())

@app.route('/features') def draw_charts(): limit = request.args.get('limit') if not limit: limit = 30 data = fetch_data(limit=int(limit)) chart = Lottery(data).draw_features() return markupsafe.Markup(chart.render_embed())

if name == 'main': app.run() ``` 執行截圖:


image.png


image.png

總結

至此接觸過matplotlib、plotly、plotly express、dash、seaborn、pyecharts,特點如下:

matplotlib | plotly | plotly express | dash | seaborn | pyecharts | | ------------------- | ------------ | ------------------------ | ----------------------- | -------------------------- | --------------- | | 配置靈活
文件完善
互動性差
程式碼量多 | 程式碼量較多
文件不太友好 | plotly進一步封裝,支援動態互動,文件不太友好 | 同樣是plot生態一員,支援web框架整合 | 程式碼量少,圖表比matplotlib好看,也不支援互動 | 程式碼量較多
文件完善
樣式豐富 |

seaborn

奇偶性分析.png

plotly express

newplot (2).png