Python-100-Days/Day76-90/code/4-pandas多层索引.ipynb

569 lines
14 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"# 数据分析BI-------->人工智能AI\n",
"# 数据分析和数据挖掘一个意思,\n",
"# 工具和软件Excel 免费版\n",
"# SPSS一人一年10000、SAS一人一年5000、Matlab 收费\n",
"# R、Python全方位语言流行 免费\n",
"# Python + numpy + scipy + pandas + matplotlib + seaborn + pyEcharts + sklearn + kereasTensorflow+…… \n",
"# 代码,自动化(数据输入----输出结果)\n",
"from pandas import Series,DataFrame"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/plain": [
"a 63\n",
"b 107\n",
"c 16\n",
"d 35\n",
"e 140\n",
"f 83\n",
"dtype: int32"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 多层索引,行列\n",
"# 单层索引\n",
"s = Series(np.random.randint(0,150,size = 6),index=list('abcdef'))\n",
"s"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"张三 期中 114\n",
" 期末 131\n",
"李四 期中 3\n",
" 期末 63\n",
"王五 期中 107\n",
" 期末 34\n",
"dtype: int32"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 多层索引,两层,三层以上(规则一样)\n",
"s2 = Series(np.random.randint(0,150,size = 6),index = pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]))\n",
"s2"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">张三</th>\n",
" <th>期中</th>\n",
" <td>73</td>\n",
" <td>5</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>37</td>\n",
" <td>36</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">李四</th>\n",
" <th>期中</th>\n",
" <td>149</td>\n",
" <td>81</td>\n",
" <td>142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>71</td>\n",
" <td>138</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">王五</th>\n",
" <th>期中</th>\n",
" <td>11</td>\n",
" <td>94</td>\n",
" <td>103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>25</td>\n",
" <td>121</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 73 5 25\n",
" 期末 37 36 56\n",
"李四 期中 149 81 142\n",
" 期末 71 138 0\n",
"王五 期中 11 94 103\n",
" 期末 25 121 83"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = DataFrame(np.random.randint(0,150,size = (6,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末']]) )\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">张三</th>\n",
" <th rowspan=\"2\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>15</td>\n",
" <td>31</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>82</td>\n",
" <td>56</td>\n",
" <td>123</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>14</td>\n",
" <td>2</td>\n",
" <td>78</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>69</td>\n",
" <td>50</td>\n",
" <td>17</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">李四</th>\n",
" <th rowspan=\"2\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>91</td>\n",
" <td>87</td>\n",
" <td>143</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>120</td>\n",
" <td>118</td>\n",
" <td>39</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>56</td>\n",
" <td>76</td>\n",
" <td>55</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>11</td>\n",
" <td>105</td>\n",
" <td>121</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"4\" valign=\"top\">王五</th>\n",
" <th rowspan=\"2\" valign=\"top\">期中</th>\n",
" <th>A</th>\n",
" <td>147</td>\n",
" <td>78</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>128</td>\n",
" <td>126</td>\n",
" <td>146</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">期末</th>\n",
" <th>A</th>\n",
" <td>49</td>\n",
" <td>45</td>\n",
" <td>114</td>\n",
" </tr>\n",
" <tr>\n",
" <th>B</th>\n",
" <td>121</td>\n",
" <td>26</td>\n",
" <td>77</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 A 15 31 17\n",
" B 82 56 123\n",
" 期末 A 14 2 78\n",
" B 69 50 17\n",
"李四 期中 A 91 87 143\n",
" B 120 118 39\n",
" 期末 A 56 76 55\n",
" B 11 105 121\n",
"王五 期中 A 147 78 1\n",
" B 128 126 146\n",
" 期末 A 49 45 114\n",
" B 121 26 77"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 三层索引\n",
"df3 = DataFrame(np.random.randint(0,150,size = (12,3)),columns=['Python','En','Math'],index =pd.MultiIndex.from_product([['张三','李四','王五'],['期中','期末'],['A','B']]) )\n",
"\n",
"df3"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"73"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 先获取列后获取行\n",
"df['Python']['张三']['期中']"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"df2 = df.copy()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">张三</th>\n",
" <th>期中</th>\n",
" <td>73</td>\n",
" <td>5</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>37</td>\n",
" <td>36</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">李四</th>\n",
" <th>期中</th>\n",
" <td>149</td>\n",
" <td>81</td>\n",
" <td>142</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>71</td>\n",
" <td>138</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">王五</th>\n",
" <th>期中</th>\n",
" <td>11</td>\n",
" <td>94</td>\n",
" <td>103</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>25</td>\n",
" <td>121</td>\n",
" <td>83</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 73 5 25\n",
" 期末 37 36 56\n",
"李四 期中 149 81 142\n",
" 期末 71 138 0\n",
"王五 期中 11 94 103\n",
" 期末 25 121 83"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2.sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"73"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# 先获取行,后获取列\n",
"df.loc['张三'].loc['期中']['Python']"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th></th>\n",
" <th>Python</th>\n",
" <th>En</th>\n",
" <th>Math</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"2\" valign=\"top\">张三</th>\n",
" <th>期中</th>\n",
" <td>73</td>\n",
" <td>5</td>\n",
" <td>25</td>\n",
" </tr>\n",
" <tr>\n",
" <th>期末</th>\n",
" <td>37</td>\n",
" <td>36</td>\n",
" <td>56</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Python En Math\n",
"张三 期中 73 5 25\n",
" 期末 37 36 56"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.iloc[[0,1]]"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}