抱歉,您的浏览器无法访问本站
本页面需要浏览器支持(启用)JavaScript
了解详情 >

[toc]

python 表格操作

将awk生成的数据导入表格


pandas :pannel data analysis(面板数据分析)。pandas是基于numpy构建的,为时间序列分析提供了很好的支持。pandas中有两个主要的数据结构,一个是Series,另一个是DataFrame。

Series 类似于一维数组与字典(map)数据结构的结合。它由一组数据和一组与数据相对应的数据标签(索引index)组成。这组数据和索引标签的基础都是一个一维ndarray数组。可将index索引理解为行索引。 Series的表现形式为:索引在左,数据在右。

DataFrame是一个类似表格的数据结构,索引包括列索引和行索引,包含有一组有序的列,每列可以是不同的值类型(数值、字符串、布尔值等)。DataFrame的每一行和每一列都是一个Series,这个Series的name属性为当前的行索引名/列索引名。

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import openpyxl as xl
import pandas as pd
import numpy as np
import os
import datetime

cycle=38
time_year_month_day = str(datetime.date.today()-datetime.timedelta(days=cycle))
def proj_xls_to_xlsx(file_path,sheet_name,save_name,tableTitle=None):
    with open(file_path) as f:
        data = f.readlines()
    if not os.path.exists(save_name):
        wb = xl.Workbook()
        wb.save(save_name)
    else:
        wb = xl.load_workbook(save_name)
    ws1 = wb.create_sheet(0)
    ws1.title = sheet_name
    if tableTitle != None:
        for n in range(len(tableTitle)):
            c = n + 1
            ws1.cell(row=1, column=c).value = tableTitle[n]
    for d in data:
        value = d.split('\t')
        try:

           # print(value)
            value[5]=float(value[5])
        except:
            pass
        ws1.append(value)
    wb.save(save_name)

def crate_shuju_toushi(save_name,sheet_name,sheet_name2,index_name,values_name,department_name,new_department_name,personnel_name):
    f = pd.read_excel(io=save_name,sheet_name=sheet_name)
    res = pd.pivot_table(f,index=[index_name],values=[values_name],aggfunc=np.sum, margins=True)

    wb = xl.load_workbook(save_name)
    old_title = wb.worksheets[0]
    old_title.title = sheet_name2

    all_xinxi_title=list(zip(list((res.index)),list(res[values_name])))
    department_personnel = list(zip(f[department_name],f[index_name]))
    department_personnel_size_list = []


    for user in all_xinxi_title:
         for department_personnel_list in department_personnel:
             if user[0] in department_personnel_list:
                 if user[0] == '-':
                     continue
                 if user[0] == ' ':
                     continue
                 department_personnel_size_list.append((department_personnel_list[0],user[0],user[1]))

    end_department_personnel_size_list = sorted(list(set(department_personnel_size_list)), key=lambda x:x[2] ,reverse=True)
    all_xinxi_title_end = all_xinxi_title[-1]
    all_xinxi_title_end = list(all_xinxi_title_end)
    all_xinxi_title_end.insert(0,'')
    end_department_personnel_size_list.append(all_xinxi_title_end)
    end_department_personnel_size_list.insert(0,[new_department_name,personnel_name,values_name])
    end_department_personnel_size_list.pop()

    user_volue = []
    for user2 in end_department_personnel_size_list:
        user2 = list(user2)
        if user2[1] == user_volue:
            continue
        user_volue = user2[1]
        old_title.append(user2)
    wb.save(save_name)

def create_space_toushi(save_name,sheet_name,sheet_name2,department_num,sum_name,data_total,index_name,values_name):
    f = pd.read_excel(io=save_name,sheet_name=sheet_name,dtype={'业务XX利润编号':str})
    res = pd.pivot_table(f,index=index_name,values=values_name,aggfunc=np.sum,margins=True)
    all_list = list(zip(list(res.index),list(res[values_name])))

    wb = xl.load_workbook(save_name)
    sheet = wb.create_sheet(sheet_name2)

    space_list = []
    for user in all_list:
        user2 = list(user)[1]
        user3 = list(list(user)[0])
        if user3[0] == '-':
            space_list.append((user3[1],user2))
        if user3[0] == ' ':
            space_list.append((user3[1],user2))
    sum_list = []
    for slist in space_list:
        sum_list.append(slist[1])
    sum_list=sum(sum_list)
    sort_space_list = sorted(list(set(space_list)), key=lambda x:x[1] ,reverse=True)
    sort_space_list.append((sum_name, sum_list))
    sort_space_list.insert(0,(department_num,data_total))
    for all_space_list in sort_space_list:
        sheet.append(all_space_list)
    wb.save(save_name)

if __name__=='__main__':
    sheet_name = 'XXX详情'
    save_name = 'TJ-XJ-DATASHOW-' + time_year_month_day + '.xlsx'
    save_name = 'TJ-XJ-DATASHOW-' + time_year_month_day + '.xlsx'
    file_path = './able.' + time_year_month_day +'.lib.xls'
    proj_xls_to_xlsx(file_path=file_path,
                     sheet_name=sheet_name,
                     save_name=save_name)

    sheet_name = 'XXX总体情况'
    save_name = 'TJ-XJ-DATASHOW-' +  time_year_month_day + '.xlsx'
    file_path = './' + time_year_month_day + '.ProjInfo.XJpath.xls'
    table_title = ['业务XX名称','业务XX利润编号',
                   '运营经理','信息分析','涉及项目数',
                   '涉及数据量大小(G)','项目编号1',
                   '项目名称1','数据量(G)1',
                   '文库编号1','项目编号2',
                   '项目名称2','数据量(G)2',
                   '文库编号2','项目编号3',
                   '项目名称3','数据量(G)3',
                   '文库编号3','项目编号4',
                   '项目名称4   数据量(G)4','文库编号4',]
    proj_xls_to_xlsx(file_path=file_path,
                     sheet_name=sheet_name,
                     tableTitle=table_title,
                     save_name=save_name)

    save_name = 'TJ-XJ-DATASHOW-' + time_year_month_day + '.xlsx'
    sheet_name = 'XXX总体情况'
    sheet_name2 = 'XXX概况'
    index_name = '信息分析'
    values_name = '涉及数据量大小(G)'
    department_name = '业务XX名称'
    new_department_name = 'XX'
    personnel_name = '人员'
    crate_shuju_toushi(save_name=save_name,
                       sheet_name=sheet_name,
                       sheet_name2=sheet_name2,
                       index_name=index_name,
                       values_name=values_name,
                       department_name=department_name,
                       new_department_name=new_department_name,
                       personnel_name=personnel_name)

    index_name = ['信息分析','业务XX利润编号']
    save_name = 'TJ-XJ-DATASHOW-' + time_year_month_day + '.xlsx'
    sheet_name = 'XXX总体情况'
    department_num = 'XX编号'
    data_total = '数据量(G)'
    values_name = '涉及数据量大小(G)'
    sheet_name2 = '未匹配人员数据量'
    sum_name = 'ALL'
    create_space_toushi(index_name=index_name,
                        save_name=save_name,
                        sheet_name=sheet_name,
                        department_num=department_num,
                        data_total=data_total,
                        values_name=values_name,
                        sheet_name2=sheet_name2,
                        sum_name=sum_name,)

完成数据透视,

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import pandas as pd
import openpyxl as xl

import datetime
difference_cycle=33
time_year_month_day = str(datetime.date.today()-datetime.timedelta(days=difference_cycle))
def add_last_excel(old_sheet,new_sheet,gaikuang):
    pd01 = pd.read_excel(old_sheet,sheet_name=gaikuang,encoding='utf-8')
    pd02 = pd.read_excel(new_sheet,sheet_name=gaikuang,encoding='utf-8')

    pd11 = pd.read_excel(old_sheet,sheet_name='未匹配人员数据量',encoding='utf-8')
    pd12 = pd.read_excel(new_sheet,sheet_name='未匹配人员数据量',encoding='utf-8')


    result = pd.merge(pd01,pd02[['人员','涉及数据量大小(G)']],on='人员')
    result_list = list(zip(list((result['XXX'])),list((result['人员'])),list((result['涉及数据量大小(G)_x'])),list((result['涉及数据量大小(G)_y']))))

    not_match = pd.merge(pd11,pd12[['XXX编号','数据量(G)']],left_on='XXX编号',right_on='XXX编号')
    print(not_match)
    not_match_list = list(zip(list((not_match['XXX编号'])),list((not_match['数据量(G)_x'])),list((not_match['数据量(G)_y']))))

    wb2 = xl.load_workbook(new_sheet)
    remove_sheet1 = wb2[gaikuang]
    remove_sheet2 = wb2['未匹配人员数据量']
    wb2.remove(remove_sheet1)
    wb2.remove(remove_sheet2)
    wb2.save(new_sheet)

    wb2 = xl.load_workbook(new_sheet)
    sheet21 = wb2.create_sheet(gaikuang,0)
    sheet22 = wb2.create_sheet('未匹配人员数据量')

    result_head = ['XXX','人员','涉及数据量(G)','第二次涉及数据量(G)','任务额差']
    result_list.insert(0,result_head)
    result_for_num = 1
    for i in result_list:
        result_i = list(i)
        if result_for_num != 1:
            result_chae=i[2]-i[3]-i[2]*0.2
            result_i.append(float('%.2f'% result_chae))
        result_for_num = result_for_num + 1
        sheet21.append(result_i)

    not_match_head = ['XXX编号','数据量(G)','第二次数据量(G)','任务额差']
    not_match_list.insert(0,not_match_head)
    not_match_for_num = 1
    for j in  not_match_list:
        not_match_j = list(j)
        if not_match_for_num != 1:
            not_match_chae=j[1]-j[2]-j[1]*0.2
            not_match_j.append(float('%.2f'% not_match_chae))
        not_match_for_num = not_match_for_num + 1
        sheet22.append(not_match_j)
    wb2.save(new_sheet)

if __name__ == '__main__':
    old_sheet='TJ-XJ-DATASHOW-' + time_year_month_day + '-old.xlsx'
    new_sheet='TJ-XJ-DATASHOW-' + time_year_month_day + '.xlsx'
    gaikuang='XXX概况'
    add_last_excel(old_sheet=old_sheet,new_sheet=new_sheet,gaikuang=gaikuang)

评论