{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# 分组聚合透视\n", "# 很多时候属性是相似的\n", "\n", "import numpy as np\n", "\n", "import pandas as pd\n", "\n", "from pandas import Series,DataFrame" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HandSmokesexweightIQ
0rightyesmale80100
1leftyesfemale50120
2leftnofemale4890
3rightnomale75130
4rightyesmale68140
5rightnomale10080
6rightnofemale4094
7rightnofemale90110
8leftnomale88100
9rightyesfemale76160
\n", "
" ], "text/plain": [ " Hand Smoke sex weight IQ\n", "0 right yes male 80 100\n", "1 left yes female 50 120\n", "2 left no female 48 90\n", "3 right no male 75 130\n", "4 right yes male 68 140\n", "5 right no male 100 80\n", "6 right no female 40 94\n", "7 right no female 90 110\n", "8 left no male 88 100\n", "9 right yes female 76 160" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 走右手习惯,是否抽烟,性别,对体重,智商,有一定影响\n", "\n", "df = DataFrame({'Hand':['right','left','left','right','right','right','right','right','left','right'],\n", " 'Smoke':['yes','yes','no','no','yes','no','no','no','no','yes'],\n", " 'sex':['male','female','female','male','male','male','female','female','male','female'],\n", " 'weight':[80,50,48,75,68,100,40,90,88,76],\n", " 'IQ':[100,120,90,130,140,80,94,110,100,160]})\n", "df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# 分组聚合查看规律,某一条件下规律" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weightIQ
Hand
left62.0103.3
right75.6116.3
\n", "
" ], "text/plain": [ " weight IQ\n", "Hand \n", "left 62.0 103.3\n", "right 75.6 116.3" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = df.groupby(by = ['Hand'])[['weight','IQ']].mean().round(1)\n", "data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weight
Hand
left62.0
right75.6
\n", "
" ], "text/plain": [ " weight\n", "Hand \n", "left 62.0\n", "right 75.6" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(by = ['Hand'])[['weight']].apply(np.mean).round(1)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "df2 = df.groupby(by = ['Hand'])[['weight']].transform(np.mean).round(1)\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
weight_mean
075.6
162.0
262.0
375.6
475.6
575.6
675.6
775.6
862.0
975.6
\n", "
" ], "text/plain": [ " weight_mean\n", "0 75.6\n", "1 62.0\n", "2 62.0\n", "3 75.6\n", "4 75.6\n", "5 75.6\n", "6 75.6\n", "7 75.6\n", "8 62.0\n", "9 75.6" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = df2.add_suffix('_mean')\n", "df2" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
HandSmokesexweightIQweight_mean
0rightyesmale8010075.6
1leftyesfemale5012062.0
2leftnofemale489062.0
3rightnomale7513075.6
4rightyesmale6814075.6
5rightnomale1008075.6
6rightnofemale409475.6
7rightnofemale9011075.6
8leftnomale8810062.0
9rightyesfemale7616075.6
\n", "
" ], "text/plain": [ " Hand Smoke sex weight IQ weight_mean\n", "0 right yes male 80 100 75.6\n", "1 left yes female 50 120 62.0\n", "2 left no female 48 90 62.0\n", "3 right no male 75 130 75.6\n", "4 right yes male 68 140 75.6\n", "5 right no male 100 80 75.6\n", "6 right no female 40 94 75.6\n", "7 right no female 90 110 75.6\n", "8 left no male 88 100 62.0\n", "9 right yes female 76 160 75.6" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = df.merge(df2,left_index=True,right_index=True)\n", "df3" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "Hand\n", "left ([3, 3], [62.0, 103.3])\n", "right ([7, 7], [75.6, 116.3])\n", "dtype: object" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def count(x):\n", " \n", " return (x.count(),x.mean().round(1))\n", "\n", "df.groupby(by = ['Hand'])[['weight','IQ']].apply(count)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IQ
Handsex
leftfemale120
male100
rightfemale160
male140
\n", "
" ], "text/plain": [ " IQ\n", "Hand sex \n", "left female 120\n", " male 100\n", "right female 160\n", " male 140" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.groupby(by = ['Hand','sex'])[['IQ']].max()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = df.groupby(by = ['Hand'])['IQ','weight']\n", "data" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IQweight
maxmeanmaxmean
Hand
left120103.38862.0
right160116.310075.6
\n", "
" ], "text/plain": [ " IQ weight \n", " max mean max mean\n", "Hand \n", "left 120 103.3 88 62.0\n", "right 160 116.3 100 75.6" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.agg(['max','mean']).round(1)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IQweight
Hand
left12062.0
right16075.6
\n", "
" ], "text/plain": [ " IQ weight\n", "Hand \n", "left 120 62.0\n", "right 160 75.6" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.agg({'IQ':'max','weight':'mean'}).round(1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }