Python-100-Days/Day76-90/code/6-pandas数据集成.ipynb

1210 lines
32 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"from pandas import Series,DataFrame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 数据分析数据挖掘\n",
"# 有数据情况下:\n",
"# 数据预处理\n",
"# 数据清洗(空数据,异常值)\n",
"# 数据集成(多个数据合并到一起,级联)数据可能存放在多个表中\n",
"# 数据转化\n",
"# 数据规约(属性减少(不重要的属性删除),数据减少去重操作)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 5, 12, 67, 29, 46, 103, 53, 53, 139, 87],\n",
" [126, 33, 55, 104, 45, 70, 96, 133, 116, 43],\n",
" [ 84, 45, 17, 42, 19, 11, 125, 43, 54, 39],\n",
" [ 97, 68, 99, 90, 28, 60, 135, 84, 111, 63],\n",
" [114, 56, 30, 81, 48, 73, 119, 65, 20, 22]])"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"array([[115, 128, 122, 127, 4, 135, 26, 25, 131, 139],\n",
" [ 66, 119, 37, 136, 101, 40, 102, 127, 148, 127],\n",
" [ 89, 80, 140, 133, 51, 142, 47, 27, 54, 23],\n",
" [ 64, 127, 33, 128, 60, 106, 67, 94, 110, 76],\n",
" [ 6, 21, 23, 96, 10, 62, 26, 79, 149, 43],\n",
" [116, 143, 132, 118, 68, 21, 57, 133, 124, 124]])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 首先看numpy数组的集成\n",
"nd1 = np.random.randint(0,150,size = (5,10))\n",
"\n",
"nd2 = np.random.randint(0,150,size = (6,10))\n",
"display(nd1,nd2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 5, 12, 67, 29, 46, 103, 53, 53, 139, 87],\n",
" [126, 33, 55, 104, 45, 70, 96, 133, 116, 43],\n",
" [ 84, 45, 17, 42, 19, 11, 125, 43, 54, 39],\n",
" [ 97, 68, 99, 90, 28, 60, 135, 84, 111, 63],\n",
" [114, 56, 30, 81, 48, 73, 119, 65, 20, 22],\n",
" [115, 128, 122, 127, 4, 135, 26, 25, 131, 139],\n",
" [ 66, 119, 37, 136, 101, 40, 102, 127, 148, 127],\n",
" [ 89, 80, 140, 133, 51, 142, 47, 27, 54, 23],\n",
" [ 64, 127, 33, 128, 60, 106, 67, 94, 110, 76],\n",
" [ 6, 21, 23, 96, 10, 62, 26, 79, 149, 43],\n",
" [116, 143, 132, 118, 68, 21, 57, 133, 124, 124]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 原来数据一个5行一个是6行级联之后变成了11行\n",
"nd3 = np.concatenate([nd1,nd2],axis = 0)\n",
"nd3"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[110, 38, 144, 92, 38, 2, 67, 2, 103, 81],\n",
" [ 56, 61, 61, 22, 108, 145, 95, 44, 40, 100],\n",
" [ 65, 74, 85, 123, 47, 117, 35, 55, 120, 20],\n",
" [ 15, 9, 4, 84, 71, 133, 140, 13, 71, 91],\n",
" [ 94, 31, 41, 5, 7, 32, 50, 24, 18, 120]])"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"array([[ 65, 149, 86, 138, 98],\n",
" [136, 49, 102, 45, 140],\n",
" [ 13, 124, 94, 81, 73],\n",
" [ 82, 38, 0, 75, 94],\n",
" [146, 28, 143, 61, 49]])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"nd1 = np.random.randint(0,150,size = (5,10))\n",
"\n",
"nd2 = np.random.randint(0,150,size = (5,5))\n",
"display(nd1,nd2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[110, 38, 144, 92, 38, 2, 67, 2, 103, 81, 65, 149, 86,\n",
" 138, 98],\n",
" [ 56, 61, 61, 22, 108, 145, 95, 44, 40, 100, 136, 49, 102,\n",
" 45, 140],\n",
" [ 65, 74, 85, 123, 47, 117, 35, 55, 120, 20, 13, 124, 94,\n",
" 81, 73],\n",
" [ 15, 9, 4, 84, 71, 133, 140, 13, 71, 91, 82, 38, 0,\n",
" 75, 94],\n",
" [ 94, 31, 41, 5, 7, 32, 50, 24, 18, 120, 146, 28, 143,\n",
" 61, 49]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# axis = 0行级联第一维度的级联axis = 1第二个维度的级联列的级联\n",
"np.concatenate((nd1,nd2),axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# pandas级联操作pandas基于numpy\n",
"# pandas的级联类似"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113</td>\n",
" <td>53</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135</td>\n",
" <td>40</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144</td>\n",
" <td>18</td>\n",
" <td>64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 113 53 80\n",
"B 135 40 52\n",
"C 144 18 64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>126</td>\n",
" <td>118</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>147</td>\n",
" <td>81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>87</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>35</td>\n",
" <td>95</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>130</td>\n",
" <td>117</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>124</td>\n",
" <td>98</td>\n",
" <td>122</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"D 126 118 146\n",
"E 147 81 27\n",
"F 87 63 1\n",
"G 35 95 33\n",
"H 130 117 91\n",
"I 124 98 122"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df1 = DataFrame(np.random.randint(0,150,size = (3,3)),index = list('ABC'),columns=['Python','Math','En'])\n",
"\n",
"df2 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('DEFGHI'),columns=['Python','Math','En'])\n",
"\n",
"display(df1,df2)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113</td>\n",
" <td>53</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135</td>\n",
" <td>40</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144</td>\n",
" <td>18</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>126</td>\n",
" <td>118</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>147</td>\n",
" <td>81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>87</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>35</td>\n",
" <td>95</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>130</td>\n",
" <td>117</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>124</td>\n",
" <td>98</td>\n",
" <td>122</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 113 53 80\n",
"B 135 40 52\n",
"C 144 18 64\n",
"D 126 118 146\n",
"E 147 81 27\n",
"F 87 63 1\n",
"G 35 95 33\n",
"H 130 117 91\n",
"I 124 98 122"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pandas汇总数据数据集成\n",
"df1.append(df2)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113</td>\n",
" <td>53</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135</td>\n",
" <td>40</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144</td>\n",
" <td>18</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>126</td>\n",
" <td>118</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>147</td>\n",
" <td>81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>87</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>35</td>\n",
" <td>95</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>130</td>\n",
" <td>117</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>124</td>\n",
" <td>98</td>\n",
" <td>122</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 113 53 80\n",
"B 135 40 52\n",
"C 144 18 64\n",
"D 126 118 146\n",
"E 147 81 27\n",
"F 87 63 1\n",
"G 35 95 33\n",
"H 130 117 91\n",
"I 124 98 122"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([df1,df2])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\python36\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
"of pandas will change to not sort by default.\n",
"\n",
"To accept the future behavior, pass 'sort=False'.\n",
"\n",
"To retain the current behavior and silence the warning, pass 'sort=True'.\n",
"\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113.0</td>\n",
" <td>53.0</td>\n",
" <td>80.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135.0</td>\n",
" <td>40.0</td>\n",
" <td>52.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144.0</td>\n",
" <td>18.0</td>\n",
" <td>64.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>126.0</td>\n",
" <td>118.0</td>\n",
" <td>146.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>147.0</td>\n",
" <td>81.0</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>87.0</td>\n",
" <td>63.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>35.0</td>\n",
" <td>95.0</td>\n",
" <td>33.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>130.0</td>\n",
" <td>117.0</td>\n",
" <td>91.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>124.0</td>\n",
" <td>98.0</td>\n",
" <td>122.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En Python Math En\n",
"A 113.0 53.0 80.0 NaN NaN NaN\n",
"B 135.0 40.0 52.0 NaN NaN NaN\n",
"C 144.0 18.0 64.0 NaN NaN NaN\n",
"D NaN NaN NaN 126.0 118.0 146.0\n",
"E NaN NaN NaN 147.0 81.0 27.0\n",
"F NaN NaN NaN 87.0 63.0 1.0\n",
"G NaN NaN NaN 35.0 95.0 33.0\n",
"H NaN NaN NaN 130.0 117.0 91.0\n",
"I NaN NaN NaN 124.0 98.0 122.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([df1,df2],axis = 1,ignore_index = False)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>22</td>\n",
" <td>58</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>99</td>\n",
" <td>57</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>51</td>\n",
" <td>28</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>137</td>\n",
" <td>23</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>49</td>\n",
" <td>78</td>\n",
" <td>115</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 22 58 13\n",
"B 99 57 35\n",
"C 51 28 24\n",
"E 5 60 111\n",
"F 137 23 121\n",
"G 49 78 115"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>118</td>\n",
" <td>113</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>51</td>\n",
" <td>22</td>\n",
" <td>126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0</td>\n",
" <td>115</td>\n",
" <td>128</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>100</td>\n",
" <td>130</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>49</td>\n",
" <td>93</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>70</td>\n",
" <td>59</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 118 113 81\n",
"B 51 22 126\n",
"C 0 115 128\n",
"E 100 130 94\n",
"F 49 93 140\n",
"G 70 59 94"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 期中\n",
"df1 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCEFG'),columns=['Python','Math','En'])\n",
"\n",
"# 期末\n",
"df2 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCEFG'),columns=['Python','Math','En'])\n",
"\n",
"display(df1,df2)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>22</td>\n",
" <td>58</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>99</td>\n",
" <td>57</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>51</td>\n",
" <td>28</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>137</td>\n",
" <td>23</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>49</td>\n",
" <td>78</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>118</td>\n",
" <td>113</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>51</td>\n",
" <td>22</td>\n",
" <td>126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0</td>\n",
" <td>115</td>\n",
" <td>128</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>100</td>\n",
" <td>130</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>49</td>\n",
" <td>93</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>70</td>\n",
" <td>59</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"期中 A 22 58 13\n",
" B 99 57 35\n",
" C 51 28 24\n",
" E 5 60 111\n",
" F 137 23 121\n",
" G 49 78 115\n",
"期末 A 118 113 81\n",
" B 51 22 126\n",
" C 0 115 128\n",
" E 100 130 94\n",
" F 49 93 140\n",
" G 70 59 94"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = pd.concat([df1,df2],axis = 0,keys = ['期中','期末'])\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">A</th>\n",
" <th>期中</th>\n",
" <td>22</td>\n",
" <td>58</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>118</td>\n",
" <td>113</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">B</th>\n",
" <th>期中</th>\n",
" <td>99</td>\n",
" <td>57</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>51</td>\n",
" <td>22</td>\n",
" <td>126</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">C</th>\n",
" <th>期中</th>\n",
" <td>51</td>\n",
" <td>28</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>0</td>\n",
" <td>115</td>\n",
" <td>128</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">E</th>\n",
" <th>期中</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>100</td>\n",
" <td>130</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">F</th>\n",
" <th>期中</th>\n",
" <td>137</td>\n",
" <td>23</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>49</td>\n",
" <td>93</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">G</th>\n",
" <th>期中</th>\n",
" <td>49</td>\n",
" <td>78</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>70</td>\n",
" <td>59</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 期中 22 58 13\n",
" 期末 118 113 81\n",
"B 期中 99 57 35\n",
" 期末 51 22 126\n",
"C 期中 51 28 24\n",
" 期末 0 115 128\n",
"E 期中 5 60 111\n",
" 期末 100 130 94\n",
"F 期中 137 23 121\n",
" 期末 49 93 140\n",
"G 期中 49 78 115\n",
" 期末 70 59 94"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.unstack(level = 0).stack()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}