Python-100-Days/Day76-90/code/6-pandas数据集成.ipynb

1210 lines
32 KiB
Plaintext
Raw Normal View History

2019-05-16 11:59:06 +08:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"from pandas import Series,DataFrame"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 数据分析数据挖掘\n",
"# 有数据情况下:\n",
"# 数据预处理\n",
"# 数据清洗(空数据,异常值)\n",
"# 数据集成(多个数据合并到一起,级联)数据可能存放在多个表中\n",
"# 数据转化\n",
"# 数据规约(属性减少(不重要的属性删除),数据减少去重操作)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 5, 12, 67, 29, 46, 103, 53, 53, 139, 87],\n",
" [126, 33, 55, 104, 45, 70, 96, 133, 116, 43],\n",
" [ 84, 45, 17, 42, 19, 11, 125, 43, 54, 39],\n",
" [ 97, 68, 99, 90, 28, 60, 135, 84, 111, 63],\n",
" [114, 56, 30, 81, 48, 73, 119, 65, 20, 22]])"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"array([[115, 128, 122, 127, 4, 135, 26, 25, 131, 139],\n",
" [ 66, 119, 37, 136, 101, 40, 102, 127, 148, 127],\n",
" [ 89, 80, 140, 133, 51, 142, 47, 27, 54, 23],\n",
" [ 64, 127, 33, 128, 60, 106, 67, 94, 110, 76],\n",
" [ 6, 21, 23, 96, 10, 62, 26, 79, 149, 43],\n",
" [116, 143, 132, 118, 68, 21, 57, 133, 124, 124]])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 首先看numpy数组的集成\n",
"nd1 = np.random.randint(0,150,size = (5,10))\n",
"\n",
"nd2 = np.random.randint(0,150,size = (6,10))\n",
"display(nd1,nd2)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 5, 12, 67, 29, 46, 103, 53, 53, 139, 87],\n",
" [126, 33, 55, 104, 45, 70, 96, 133, 116, 43],\n",
" [ 84, 45, 17, 42, 19, 11, 125, 43, 54, 39],\n",
" [ 97, 68, 99, 90, 28, 60, 135, 84, 111, 63],\n",
" [114, 56, 30, 81, 48, 73, 119, 65, 20, 22],\n",
" [115, 128, 122, 127, 4, 135, 26, 25, 131, 139],\n",
" [ 66, 119, 37, 136, 101, 40, 102, 127, 148, 127],\n",
" [ 89, 80, 140, 133, 51, 142, 47, 27, 54, 23],\n",
" [ 64, 127, 33, 128, 60, 106, 67, 94, 110, 76],\n",
" [ 6, 21, 23, 96, 10, 62, 26, 79, 149, 43],\n",
" [116, 143, 132, 118, 68, 21, 57, 133, 124, 124]])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 原来数据一个5行一个是6行级联之后变成了11行\n",
"nd3 = np.concatenate([nd1,nd2],axis = 0)\n",
"nd3"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"array([[110, 38, 144, 92, 38, 2, 67, 2, 103, 81],\n",
" [ 56, 61, 61, 22, 108, 145, 95, 44, 40, 100],\n",
" [ 65, 74, 85, 123, 47, 117, 35, 55, 120, 20],\n",
" [ 15, 9, 4, 84, 71, 133, 140, 13, 71, 91],\n",
" [ 94, 31, 41, 5, 7, 32, 50, 24, 18, 120]])"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"array([[ 65, 149, 86, 138, 98],\n",
" [136, 49, 102, 45, 140],\n",
" [ 13, 124, 94, 81, 73],\n",
" [ 82, 38, 0, 75, 94],\n",
" [146, 28, 143, 61, 49]])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"nd1 = np.random.randint(0,150,size = (5,10))\n",
"\n",
"nd2 = np.random.randint(0,150,size = (5,5))\n",
"display(nd1,nd2)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[110, 38, 144, 92, 38, 2, 67, 2, 103, 81, 65, 149, 86,\n",
" 138, 98],\n",
" [ 56, 61, 61, 22, 108, 145, 95, 44, 40, 100, 136, 49, 102,\n",
" 45, 140],\n",
" [ 65, 74, 85, 123, 47, 117, 35, 55, 120, 20, 13, 124, 94,\n",
" 81, 73],\n",
" [ 15, 9, 4, 84, 71, 133, 140, 13, 71, 91, 82, 38, 0,\n",
" 75, 94],\n",
" [ 94, 31, 41, 5, 7, 32, 50, 24, 18, 120, 146, 28, 143,\n",
" 61, 49]])"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# axis = 0行级联第一维度的级联axis = 1第二个维度的级联列的级联\n",
"np.concatenate((nd1,nd2),axis = 1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# pandas级联操作pandas基于numpy\n",
"# pandas的级联类似"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113</td>\n",
" <td>53</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135</td>\n",
" <td>40</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144</td>\n",
" <td>18</td>\n",
" <td>64</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 113 53 80\n",
"B 135 40 52\n",
"C 144 18 64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>126</td>\n",
" <td>118</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>147</td>\n",
" <td>81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>87</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>35</td>\n",
" <td>95</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>130</td>\n",
" <td>117</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>124</td>\n",
" <td>98</td>\n",
" <td>122</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"D 126 118 146\n",
"E 147 81 27\n",
"F 87 63 1\n",
"G 35 95 33\n",
"H 130 117 91\n",
"I 124 98 122"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df1 = DataFrame(np.random.randint(0,150,size = (3,3)),index = list('ABC'),columns=['Python','Math','En'])\n",
"\n",
"df2 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('DEFGHI'),columns=['Python','Math','En'])\n",
"\n",
"display(df1,df2)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113</td>\n",
" <td>53</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135</td>\n",
" <td>40</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144</td>\n",
" <td>18</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>126</td>\n",
" <td>118</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>147</td>\n",
" <td>81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>87</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>35</td>\n",
" <td>95</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>130</td>\n",
" <td>117</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>124</td>\n",
" <td>98</td>\n",
" <td>122</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 113 53 80\n",
"B 135 40 52\n",
"C 144 18 64\n",
"D 126 118 146\n",
"E 147 81 27\n",
"F 87 63 1\n",
"G 35 95 33\n",
"H 130 117 91\n",
"I 124 98 122"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# pandas汇总数据数据集成\n",
"df1.append(df2)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113</td>\n",
" <td>53</td>\n",
" <td>80</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135</td>\n",
" <td>40</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144</td>\n",
" <td>18</td>\n",
" <td>64</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>126</td>\n",
" <td>118</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>147</td>\n",
" <td>81</td>\n",
" <td>27</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>87</td>\n",
" <td>63</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>35</td>\n",
" <td>95</td>\n",
" <td>33</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>130</td>\n",
" <td>117</td>\n",
" <td>91</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>124</td>\n",
" <td>98</td>\n",
" <td>122</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 113 53 80\n",
"B 135 40 52\n",
"C 144 18 64\n",
"D 126 118 146\n",
"E 147 81 27\n",
"F 87 63 1\n",
"G 35 95 33\n",
"H 130 117 91\n",
"I 124 98 122"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([df1,df2])"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": true
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\python36\\lib\\site-packages\\ipykernel_launcher.py:1: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
"of pandas will change to not sort by default.\n",
"\n",
"To accept the future behavior, pass 'sort=False'.\n",
"\n",
"To retain the current behavior and silence the warning, pass 'sort=True'.\n",
"\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>113.0</td>\n",
" <td>53.0</td>\n",
" <td>80.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>135.0</td>\n",
" <td>40.0</td>\n",
" <td>52.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>144.0</td>\n",
" <td>18.0</td>\n",
" <td>64.0</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>D</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>126.0</td>\n",
" <td>118.0</td>\n",
" <td>146.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>147.0</td>\n",
" <td>81.0</td>\n",
" <td>27.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>87.0</td>\n",
" <td>63.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>35.0</td>\n",
" <td>95.0</td>\n",
" <td>33.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>H</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>130.0</td>\n",
" <td>117.0</td>\n",
" <td>91.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>I</th>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>124.0</td>\n",
" <td>98.0</td>\n",
" <td>122.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En Python Math En\n",
"A 113.0 53.0 80.0 NaN NaN NaN\n",
"B 135.0 40.0 52.0 NaN NaN NaN\n",
"C 144.0 18.0 64.0 NaN NaN NaN\n",
"D NaN NaN NaN 126.0 118.0 146.0\n",
"E NaN NaN NaN 147.0 81.0 27.0\n",
"F NaN NaN NaN 87.0 63.0 1.0\n",
"G NaN NaN NaN 35.0 95.0 33.0\n",
"H NaN NaN NaN 130.0 117.0 91.0\n",
"I NaN NaN NaN 124.0 98.0 122.0"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([df1,df2],axis = 1,ignore_index = False)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>22</td>\n",
" <td>58</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>99</td>\n",
" <td>57</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>51</td>\n",
" <td>28</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>137</td>\n",
" <td>23</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>49</td>\n",
" <td>78</td>\n",
" <td>115</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 22 58 13\n",
"B 99 57 35\n",
"C 51 28 24\n",
"E 5 60 111\n",
"F 137 23 121\n",
"G 49 78 115"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>118</td>\n",
" <td>113</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>51</td>\n",
" <td>22</td>\n",
" <td>126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0</td>\n",
" <td>115</td>\n",
" <td>128</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>100</td>\n",
" <td>130</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>49</td>\n",
" <td>93</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>70</td>\n",
" <td>59</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 118 113 81\n",
"B 51 22 126\n",
"C 0 115 128\n",
"E 100 130 94\n",
"F 49 93 140\n",
"G 70 59 94"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# 期中\n",
"df1 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCEFG'),columns=['Python','Math','En'])\n",
"\n",
"# 期末\n",
"df2 = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCEFG'),columns=['Python','Math','En'])\n",
"\n",
"display(df1,df2)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>22</td>\n",
" <td>58</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>99</td>\n",
" <td>57</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>51</td>\n",
" <td>28</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>137</td>\n",
" <td>23</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>49</td>\n",
" <td>78</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"6\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>118</td>\n",
" <td>113</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>51</td>\n",
" <td>22</td>\n",
" <td>126</td>\n",
" </tr>\n",
" <tr>\n",
" <th>C</th>\n",
" <td>0</td>\n",
" <td>115</td>\n",
" <td>128</td>\n",
" </tr>\n",
" <tr>\n",
" <th>E</th>\n",
" <td>100</td>\n",
" <td>130</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th>F</th>\n",
" <td>49</td>\n",
" <td>93</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th>G</th>\n",
" <td>70</td>\n",
" <td>59</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"期中 A 22 58 13\n",
" B 99 57 35\n",
" C 51 28 24\n",
" E 5 60 111\n",
" F 137 23 121\n",
" G 49 78 115\n",
"期末 A 118 113 81\n",
" B 51 22 126\n",
" C 0 115 128\n",
" E 100 130 94\n",
" F 49 93 140\n",
" G 70 59 94"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = pd.concat([df1,df2],axis = 0,keys = ['期中','期末'])\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>Math</th>\n",
" <th>En</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">A</th>\n",
" <th>期中</th>\n",
" <td>22</td>\n",
" <td>58</td>\n",
" <td>13</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>118</td>\n",
" <td>113</td>\n",
" <td>81</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">B</th>\n",
" <th>期中</th>\n",
" <td>99</td>\n",
" <td>57</td>\n",
" <td>35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>51</td>\n",
" <td>22</td>\n",
" <td>126</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">C</th>\n",
" <th>期中</th>\n",
" <td>51</td>\n",
" <td>28</td>\n",
" <td>24</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>0</td>\n",
" <td>115</td>\n",
" <td>128</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">E</th>\n",
" <th>期中</th>\n",
" <td>5</td>\n",
" <td>60</td>\n",
" <td>111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>100</td>\n",
" <td>130</td>\n",
" <td>94</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">F</th>\n",
" <th>期中</th>\n",
" <td>137</td>\n",
" <td>23</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>49</td>\n",
" <td>93</td>\n",
" <td>140</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">G</th>\n",
" <th>期中</th>\n",
" <td>49</td>\n",
" <td>78</td>\n",
" <td>115</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>70</td>\n",
" <td>59</td>\n",
" <td>94</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python Math En\n",
"A 期中 22 58 13\n",
" 期末 118 113 81\n",
"B 期中 99 57 35\n",
" 期末 51 22 126\n",
"C 期中 51 28 24\n",
" 期末 0 115 128\n",
"E 期中 5 60 111\n",
" 期末 100 130 94\n",
"F 期中 137 23 121\n",
" 期末 49 93 140\n",
"G 期中 49 78 115\n",
" 期末 70 59 94"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3.unstack(level = 0).stack()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}