python脚本第一弹——Excel处理
本文旨在为不熟悉python的开发人员提供excel便捷处理工具,以方便日常工作。
一、python安装
进入http://www.python.org/downloads/
,根据操作系统版本选择对应安装包。建议安装3.x版本。
安装过程注意勾选 Add to path
,将python路径信息
添加到系统环境变量PATH
中。
IDE工具建议安装jetbrains提供的pycharm社区版。
二、Excel介绍
电子表格工具,后缀为 .xlsx。
一个Excel文档为一个工作簿,工作簿可以包含多个sheet,一个sheet为一个工作表。
三、安装openpyxl模块
在DOS命令窗口执行:
pip install openpyxl==2.6.2
四、Excel处理
本模块代码及实验文件均已上传Github。
仓库地址: http://github.com/WWindmill/pythonScript
代码目录:
4.1 读取操作
``` import openpyxl from openpyxl.utils import get_column_letter, column_index_from_string
工作簿对象
workbook = openpyxl.load_workbook(".\source\automate_online-materials\example.xlsx") print('type of result: ', type(workbook)) print("all sheet names: ", workbook.sheetnames)
获取工作表
sheet = workbook['Sheet1'] print("sheet obj: ", sheet, " sheet title:", sheet.title)
获取工作簿的活动表
anotherSheet = workbook.active print("active sheet: ", anotherSheet)
获取单元格
print("cell A1: ", sheet['A1']) print("cell A1 val: ", sheet['A1'].value) c = sheet['B1'] print('Row %s, Column %s is %s' % (c.row, c.column, c.value)) print('Cell %s is %s' % (c.coordinate, c.value)) print("cell[B1]: ", sheet.cell(row=1, column=2))
步长为2
for i in range(1, 8, 2): print('row:%s,column:2, value:%s' % (i, sheet.cell(row=i, column=2).value))
获取工作表大小
print("max row: ", sheet.max_row) print("max column: ", sheet.max_column)
列字母与数字转换
print("1 mean letter: ", get_column_letter(1)) print(sheet.max_column, "mean letter: ", get_column_letter(sheet.max_column)) print("column A point at num: ", column_index_from_string('A'))
按行遍历 method1
print(tuple(sheet['A1':'C3'])) for rowCell in sheet['A1':'C3']: for eachCell in rowCell: print(eachCell.coordinate, eachCell.value) print('--- END OF ROW ---')
按行遍历 method2
print(list(sheet.rows)[0]) for cellObj in list(sheet.rows)[0]: print(cellObj.value) print('--- END OF ROW ---')
按列遍历
print(list(sheet.columns)[0]) for cellObj in list(sheet.columns)[0]: print(cellObj.value) print('--- END OF column ---') ```
4.2 写操作
``` import openpyxl
工作簿对象
workbook = openpyxl.load_workbook(".\source\example.xlsx") print('type of result: ', type(workbook)) print("all sheet names: ", workbook.sheetnames)
获取工作表
sheet = workbook['Sheet1'] print("sheet obj: ", sheet, " sheet title:", sheet.title)
修改sheet名称 并转储为另一个文件
sheet.title = 'Spam Spam Spam' workbook.save('.\source\example_copy.xlsx')
创建和删除工作表
workbook.create_sheet(index=3, title="the fourth sheet") print('sheet names: ', workbook.sheetnames) del workbook['the fourth sheet'] print('sheet names: ', workbook.sheetnames) workbook.save('.\source\example_copy.xlsx')
修改单元格属性值
sheet['B1'] = 'Hello, world!' print('B1 modified value: ', sheet['B1'].value) workbook.save('.\source\example_copy.xlsx') ```
4.3 其他操作
``` import openpyxl
公式
workbookCal = openpyxl.Workbook() sheet = workbookCal.active sheet['A1'] = 200 sheet['A2'] = 300
设置公式.
sheet['A3'] = '=SUM(A1:A2)' workbookCal.save('.\source\writeFormula.xlsx')
行、列操作
workbookOpt = openpyxl.Workbook() sheetOpt = workbookOpt.active sheetOpt['A1'] = 'Tall row' sheetOpt['B2'] = 'Wide column'
设置宽高
sheetOpt.row_dimensions[1].height = 70 sheetOpt.column_dimensions['B'].width = 20
合并单元格
sheetOpt.merge_cells('A1:D3') sheetOpt['A1'] = 'Twelve cells merged together.' sheetOpt.merge_cells('C5:D5') sheetOpt['C5'] = 'Two merged cells.' workbookOpt.save('.\source\dimensions.xlsx')
分拆单元格
sheetOpt.unmerge_cells('C5:D5') workbookOpt.save('.\source\dimensions.xlsx')
冻结窗口
sheetOpt.freeze_panes = 'C5' workbookOpt.save('.\source\dimensions.xlsx')
图表
workbookDraw = openpyxl.Workbook() sheetDraw = workbookDraw.active for i in range(1, 11): sheetDraw['A' + str(i)] = i refObj = openpyxl.chart.Reference(sheet, min_col=1, min_row=1,max_col=1, max_row=10) seriesObj = openpyxl.chart.Series(refObj, title='First series') chartObj = openpyxl.chart.BarChart() chartObj.title = 'My Chart' chartObj.append(seriesObj) sheetDraw.add_chart(chartObj, 'C5') workbookDraw.save('.\source\sampleChart.xlsx') ```
五、综合实践
根据如下表结构统计各县人口总数以及普查区数,并输出为Json文件。
| CensusTract | State | County | POP | | --- | --- | --- | --- | | ... | ... | ... | ... |
其中: - CensusTract表示普查区编号 - State表示州简称 - County表示县名称 - POP表示普查区人口数
文件下载地址:censuspopdata.xlsx
实现代码如下: ``` import openpyxl, pprint
print('Opening workbook...') workbook = openpyxl.load_workbook('.\source\censuspopdata.xlsx') sheet = workbook['Population by Census Tract'] countyData = {} print('Reading rows...') for row in range(2, sheet.max_row + 1): # Each row in the spreadsheet has data for one census tract. state = sheet['B' + str(row)].value county = sheet['C' + str(row)].value pop = sheet['D' + str(row)].value # Make sure the key for this state exists.if already exist, execute nothing. countyData.setdefault(state, {}) # Make sure the key for this county in this state exists.if already exist, execute nothing. countyData[state].setdefault(county, {'tracts': 0, 'pop': 0}) # Each row represents one census tract, so increment by one. countyData[state][county]['tracts'] += 1 # Increase the county pop by the pop in this census tract. countyData[state][county]['pop'] += int(pop)
Open a new text file and write the contents of countyData to it.
print('Writing results...') resultFile = open('.\source\census.json', 'w') resultFile.write('allData = ' + pprint.pformat(countyData)) resultFile.close() print(pprint.pformat(countyData))
print Anchorage population
print(countyData['AK']['Anchorage']['pop'])
print('Done.') ```
返回如下:
{
"AK": {
"Aleutians East": {
"pop": 3141,
"tracts": 1
},
"Aleutians West": {
"pop": 5561,
"tracts": 2
},
"Anchorage": {
"pop": 291826,
"tracts": 55
},
"Bethel": {
"pop": 17013,
"tracts": 3
},
"Bristol Bay": {
"pop": 997,
"tracts": 1
},
"Denali": {
"pop": 1826,
"tracts": 1
},
"Dillingham": {
"pop": 4847,
"tracts": 2
},
...