1273 lines
32 KiB
Plaintext
1273 lines
32 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import numpy as np\n",
|
||
"\n",
|
||
"import pandas as pd\n",
|
||
"from pandas import Series,DataFrame"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# 上一讲,append,concat数据集成方法\n",
|
||
"# merge融合,根据某一共同属性进行级联,高级用法"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>sex</th>\n",
|
||
" <th>id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>A</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>B</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>C</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>E</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>F</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>6</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" name sex id\n",
|
||
"0 A 男 1\n",
|
||
"1 B 女 2\n",
|
||
"2 C 女 3\n",
|
||
"3 D 女 4\n",
|
||
"4 E 男 5\n",
|
||
"5 F 男 6"
|
||
]
|
||
},
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df1 = DataFrame({'name':['A','B','C','D','E','F'],\n",
|
||
" 'sex':['男','女','女','女','男','男'],\n",
|
||
" 'id':[1,2,3,4,5,6]})\n",
|
||
"df1"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>salary</th>\n",
|
||
" <th>id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>22</td>\n",
|
||
" <td>12000</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>25</td>\n",
|
||
" <td>15000</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>27</td>\n",
|
||
" <td>20000</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>21</td>\n",
|
||
" <td>30000</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>18</td>\n",
|
||
" <td>10000</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>29</td>\n",
|
||
" <td>8000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" age salary id\n",
|
||
"0 22 12000 1\n",
|
||
"1 25 15000 2\n",
|
||
"2 27 20000 3\n",
|
||
"3 21 30000 4\n",
|
||
"4 18 10000 5\n",
|
||
"5 29 8000 7"
|
||
]
|
||
},
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df2 = DataFrame({'age':[22,25,27,21,18,29],'salary':[12000,15000,20000,30000,10000,8000],'id':[1,2,3,4,5,7]})\n",
|
||
"df2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stderr",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"d:\\python36\\lib\\site-packages\\pandas\\core\\frame.py:6692: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
|
||
"of pandas will change to not sort by default.\n",
|
||
"\n",
|
||
"To accept the future behavior, pass 'sort=False'.\n",
|
||
"\n",
|
||
"To retain the current behavior and silence the warning, pass 'sort=True'.\n",
|
||
"\n",
|
||
" sort=sort)\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>salary</th>\n",
|
||
" <th>sex</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>A</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>男</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>B</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>女</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>C</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>女</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>D</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>女</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>E</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>男</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>F</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>男</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>12000.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>25.0</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>15000.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>27.0</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>20000.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>30000.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>10000.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>29.0</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>8000.0</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" age id name salary sex\n",
|
||
"0 NaN 1 A NaN 男\n",
|
||
"1 NaN 2 B NaN 女\n",
|
||
"2 NaN 3 C NaN 女\n",
|
||
"3 NaN 4 D NaN 女\n",
|
||
"4 NaN 5 E NaN 男\n",
|
||
"5 NaN 6 F NaN 男\n",
|
||
"0 22.0 1 NaN 12000.0 NaN\n",
|
||
"1 25.0 2 NaN 15000.0 NaN\n",
|
||
"2 27.0 3 NaN 20000.0 NaN\n",
|
||
"3 21.0 4 NaN 30000.0 NaN\n",
|
||
"4 18.0 5 NaN 10000.0 NaN\n",
|
||
"5 29.0 7 NaN 8000.0 NaN"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df1.append(df2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>sex</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>salary</th>\n",
|
||
" <th>id</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>A</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>12000</td>\n",
|
||
" <td>1</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>B</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>15000</td>\n",
|
||
" <td>2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>C</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>27</td>\n",
|
||
" <td>20000</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>30000</td>\n",
|
||
" <td>4</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>E</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>10000</td>\n",
|
||
" <td>5</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>F</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>29</td>\n",
|
||
" <td>8000</td>\n",
|
||
" <td>7</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" name sex id age salary id\n",
|
||
"0 A 男 1 22 12000 1\n",
|
||
"1 B 女 2 25 15000 2\n",
|
||
"2 C 女 3 27 20000 3\n",
|
||
"3 D 女 4 21 30000 4\n",
|
||
"4 E 男 5 18 10000 5\n",
|
||
"5 F 男 6 29 8000 7"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"pd.concat([df1,df2],axis = 1)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>sex</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>salary</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>A</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>22</td>\n",
|
||
" <td>12000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>B</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>25</td>\n",
|
||
" <td>15000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>C</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>27</td>\n",
|
||
" <td>20000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>21</td>\n",
|
||
" <td>30000</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>E</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>18</td>\n",
|
||
" <td>10000</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" name sex id age salary\n",
|
||
"0 A 男 1 22 12000\n",
|
||
"1 B 女 2 25 15000\n",
|
||
"2 C 女 3 27 20000\n",
|
||
"3 D 女 4 21 30000\n",
|
||
"4 E 男 5 18 10000"
|
||
]
|
||
},
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df1.merge(df2)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {
|
||
"collapsed": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>name</th>\n",
|
||
" <th>sex</th>\n",
|
||
" <th>id</th>\n",
|
||
" <th>age</th>\n",
|
||
" <th>salary</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>A</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>1</td>\n",
|
||
" <td>22.0</td>\n",
|
||
" <td>12000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>B</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>2</td>\n",
|
||
" <td>25.0</td>\n",
|
||
" <td>15000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>C</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>3</td>\n",
|
||
" <td>27.0</td>\n",
|
||
" <td>20000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>D</td>\n",
|
||
" <td>女</td>\n",
|
||
" <td>4</td>\n",
|
||
" <td>21.0</td>\n",
|
||
" <td>30000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>E</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>5</td>\n",
|
||
" <td>18.0</td>\n",
|
||
" <td>10000.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>F</td>\n",
|
||
" <td>男</td>\n",
|
||
" <td>6</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>6</th>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>NaN</td>\n",
|
||
" <td>7</td>\n",
|
||
" <td>29.0</td>\n",
|
||
" <td>8000.0</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" name sex id age salary\n",
|
||
"0 A 男 1 22.0 12000.0\n",
|
||
"1 B 女 2 25.0 15000.0\n",
|
||
"2 C 女 3 27.0 20000.0\n",
|
||
"3 D 女 4 21.0 30000.0\n",
|
||
"4 E 男 5 18.0 10000.0\n",
|
||
"5 F 男 6 NaN NaN\n",
|
||
"6 NaN NaN 7 29.0 8000.0"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df1.merge(df2,how = 'outer')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Python</th>\n",
|
||
" <th>Math</th>\n",
|
||
" <th>En</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>A</th>\n",
|
||
" <td>40</td>\n",
|
||
" <td>15</td>\n",
|
||
" <td>90</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>B</th>\n",
|
||
" <td>59</td>\n",
|
||
" <td>52</td>\n",
|
||
" <td>83</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>C</th>\n",
|
||
" <td>14</td>\n",
|
||
" <td>138</td>\n",
|
||
" <td>137</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>D</th>\n",
|
||
" <td>89</td>\n",
|
||
" <td>78</td>\n",
|
||
" <td>53</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>E</th>\n",
|
||
" <td>81</td>\n",
|
||
" <td>101</td>\n",
|
||
" <td>3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>F</th>\n",
|
||
" <td>75</td>\n",
|
||
" <td>79</td>\n",
|
||
" <td>86</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Python Math En\n",
|
||
"A 40 15 90\n",
|
||
"B 59 52 83\n",
|
||
"C 14 138 137\n",
|
||
"D 89 78 53\n",
|
||
"E 81 101 3\n",
|
||
"F 75 79 86"
|
||
]
|
||
},
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = DataFrame(np.random.randint(0,150,size = (6,3)),index = list('ABCDEF'),columns=['Python','Math','En'])\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {
|
||
"scrolled": false
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"Python 59.7\n",
|
||
"Math 77.2\n",
|
||
"En 75.3\n",
|
||
"dtype: float64"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"s = df.mean().round(1)\n",
|
||
"s"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>score_mean</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>Python</th>\n",
|
||
" <td>59.7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>Math</th>\n",
|
||
" <td>77.2</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>En</th>\n",
|
||
" <td>75.3</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" score_mean\n",
|
||
"Python 59.7\n",
|
||
"Math 77.2\n",
|
||
"En 75.3"
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df2 = DataFrame(s)\n",
|
||
"df2.columns = ['score_mean']\n",
|
||
"df2"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Python</th>\n",
|
||
" <th>Math</th>\n",
|
||
" <th>En</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>score_mean</th>\n",
|
||
" <td>59.7</td>\n",
|
||
" <td>77.2</td>\n",
|
||
" <td>75.3</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Python Math En\n",
|
||
"score_mean 59.7 77.2 75.3"
|
||
]
|
||
},
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df3 = df2.T\n",
|
||
"df3"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Python</th>\n",
|
||
" <th>Math</th>\n",
|
||
" <th>En</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>A</th>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>90.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>B</th>\n",
|
||
" <td>59.0</td>\n",
|
||
" <td>52.0</td>\n",
|
||
" <td>83.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>C</th>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>138.0</td>\n",
|
||
" <td>137.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>D</th>\n",
|
||
" <td>89.0</td>\n",
|
||
" <td>78.0</td>\n",
|
||
" <td>53.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>E</th>\n",
|
||
" <td>81.0</td>\n",
|
||
" <td>101.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>F</th>\n",
|
||
" <td>75.0</td>\n",
|
||
" <td>79.0</td>\n",
|
||
" <td>86.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>score_mean</th>\n",
|
||
" <td>59.7</td>\n",
|
||
" <td>77.2</td>\n",
|
||
" <td>75.3</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Python Math En\n",
|
||
"A 40.0 15.0 90.0\n",
|
||
"B 59.0 52.0 83.0\n",
|
||
"C 14.0 138.0 137.0\n",
|
||
"D 89.0 78.0 53.0\n",
|
||
"E 81.0 101.0 3.0\n",
|
||
"F 75.0 79.0 86.0\n",
|
||
"score_mean 59.7 77.2 75.3"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df4 = df.append(df3)\n",
|
||
"df4"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>score_mean</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>A</th>\n",
|
||
" <td>48.3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>B</th>\n",
|
||
" <td>64.7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>C</th>\n",
|
||
" <td>96.3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>D</th>\n",
|
||
" <td>73.3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>E</th>\n",
|
||
" <td>61.7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>F</th>\n",
|
||
" <td>80.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>score_mean</th>\n",
|
||
" <td>70.7</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" score_mean\n",
|
||
"A 48.3\n",
|
||
"B 64.7\n",
|
||
"C 96.3\n",
|
||
"D 73.3\n",
|
||
"E 61.7\n",
|
||
"F 80.0\n",
|
||
"score_mean 70.7"
|
||
]
|
||
},
|
||
"execution_count": 38,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df5 = DataFrame(df4.mean(axis = 1).round(1))\n",
|
||
"df5.columns = ['score_mean']\n",
|
||
"df5"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Python</th>\n",
|
||
" <th>Math</th>\n",
|
||
" <th>En</th>\n",
|
||
" <th>score_mean</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>A</th>\n",
|
||
" <td>40.0</td>\n",
|
||
" <td>15.0</td>\n",
|
||
" <td>90.0</td>\n",
|
||
" <td>48.3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>B</th>\n",
|
||
" <td>59.0</td>\n",
|
||
" <td>52.0</td>\n",
|
||
" <td>83.0</td>\n",
|
||
" <td>64.7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>C</th>\n",
|
||
" <td>14.0</td>\n",
|
||
" <td>138.0</td>\n",
|
||
" <td>137.0</td>\n",
|
||
" <td>96.3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>D</th>\n",
|
||
" <td>89.0</td>\n",
|
||
" <td>78.0</td>\n",
|
||
" <td>53.0</td>\n",
|
||
" <td>73.3</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>E</th>\n",
|
||
" <td>81.0</td>\n",
|
||
" <td>101.0</td>\n",
|
||
" <td>3.0</td>\n",
|
||
" <td>61.7</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>F</th>\n",
|
||
" <td>75.0</td>\n",
|
||
" <td>79.0</td>\n",
|
||
" <td>86.0</td>\n",
|
||
" <td>80.0</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>score_mean</th>\n",
|
||
" <td>59.7</td>\n",
|
||
" <td>77.2</td>\n",
|
||
" <td>75.3</td>\n",
|
||
" <td>70.7</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Python Math En score_mean\n",
|
||
"A 40.0 15.0 90.0 48.3\n",
|
||
"B 59.0 52.0 83.0 64.7\n",
|
||
"C 14.0 138.0 137.0 96.3\n",
|
||
"D 89.0 78.0 53.0 73.3\n",
|
||
"E 81.0 101.0 3.0 61.7\n",
|
||
"F 75.0 79.0 86.0 80.0\n",
|
||
"score_mean 59.7 77.2 75.3 70.7"
|
||
]
|
||
},
|
||
"execution_count": 40,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df4.merge(df5,left_index=True,right_index=True)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.6.5"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 2
|
||
}
|