In [ ]:
import warnings
warnings.filterwarnings('ignore')
실습파일 구성¶
- PII.csv
- pandas Package
In [2]:
import pandas as pd
- .read_csv( )
In [3]:
url = 'https://raw.githubusercontent.com/rusita-ai/pyData/master/PII.csv'
DF = pd.read_csv(url)
DF.head()
Out[3]:
Name | Gender | Age | Grade | Picture | BloodType | Height | Weight | |
---|---|---|---|---|---|---|---|---|
0 | 송태섭 | 남자 | 21 | 3 | 무 | B | 179.1 | 63.9 |
1 | 최유정 | 여자 | 23 | 1 | 유 | A | 177.1 | 54.9 |
2 | 이한나 | 여자 | 20 | 1 | 무 | A | 167.9 | 50.2 |
3 | 김소혜 | 여자 | 23 | 3 | 무 | O | 176.1 | 53.5 |
4 | 서태웅 | 남자 | 24 | 4 | 무 | B | 176.1 | 79.8 |
seaborn Package¶
In [4]:
import seaborn as sns
import matplotlib.pyplot as plt
In [5]:
plt.figure(figsize = (10, 7))
sns.lineplot(x = DF.index,
y = DF.Height,
linewidth = 1,
color = 'r',
marker = '>',
linestyle = '--')
plt.title('Line Graph', size = 30)
plt.xlabel('Index', size = 20)
plt.ylabel('Height', size = 20)
plt.grid(True)
plt.show()
II. 막대 그래프¶
1) 연속형 - .barplot( )¶
In [6]:
plt.figure(figsize = (10, 7))
sns.barplot(data = DF,
x = DF.index,
y = 'Height')
plt.show()
2) 명목형 - .countplot( )¶
In [7]:
plt.figure(figsize = (10, 7))
sns.countplot(data = DF,
x = 'BloodType')
plt.show()
In [8]:
plt.figure(figsize = (10, 7))
sns.countplot(data = DF,
x = 'BloodType',
hue = 'Gender')
plt.show()
In [9]:
plt.figure(figsize = (7, 10))
sns.countplot(data = DF,
y = 'BloodType',
hue = 'Gender')
plt.show()
In [10]:
plt.figure(figsize = (10, 7))
sns.histplot(data = DF,
x = 'Height',
bins = 5,
alpha = 0.3)
plt.show()
In [11]:
plt.figure(figsize = (10, 7))
sns.boxplot(data = DF,
x = 'BloodType',
y = 'Height',
order = ['A', 'B', 'O', 'AB'])
plt.show()
In [21]:
plt.figure(figsize = (10, 7))
sns.scatterplot(data = DF,
x = 'Height',
y = 'Weight',
marker = 'x',
s = 50)
plt.show()
In [22]:
plt.figure(figsize = (10, 7))
sns.violinplot(data = DF,
x = 'Gender',
y = 'Age')
plt.show()
VII. Histograms¶
In [28]:
fig, ax = plt.subplots(nrows = 1, ncols = 2, figsize = (15, 5))
sns.histplot(data = DF, x = 'Height',
bins = 5, alpha = 0.2,
kde = True, ax = ax[0])
sns.distplot(x = DF['Height'],
bins = 5, kde = True, color = 'royalblue',
hist = False, rug = True, ax = ax[1])
ax[0].set_title('Histogram-1', size = 20)
ax[1].set_title('Histogram-2', size = 20)
ax[0].set_xlabel('Height', size = 15)
ax[1].set_xlabel('Height', size = 15)
ax[0].set_ylabel('Frequency', size = 15)
ax[1].set_ylabel('Density', size = 15)
plt.show()
2) Multiple Plots¶
In [29]:
fig, ax = plt.subplots(nrows = 2, ncols = 2, figsize = (15, 10))
sns.barplot(data = DF, x = 'Grade', y = 'Age',
hue = 'Gender', ci = None, ax = ax[0, 0])
sns.histplot(data = DF, x = 'Weight',
bins = 6, alpha = 0.3, ax = ax[0, 1])
sns.boxplot(data = DF, x = 'BloodType', y = 'Height',
order = ['A', 'B', 'O', 'AB'], ax = ax[1, 0])
sns.scatterplot(data = DF, x = 'Height', y = 'Weight',
hue = 'Grade', style = 'BloodType', s = 50, ax = ax[1, 1])
# 'best', 'upper right', 'upper left', 'lower left', 'lower right'
# 'right', 'center left', 'center right', 'lower center', 'upper center', 'center'
ax[0, 0].legend(labels = ['Male','Female'], loc = 'upper left', title = 'Gender')
ax[0, 0].set_title('Bar Plot')
ax[0, 1].set_title('Histogram')
ax[1, 0].set_title('Box Plot')
ax[1, 1].set_title('Scatter Plot')
ax[0, 0].set_xlabel('Grade')
ax[0, 1].set_xlabel('Weight')
ax[1, 0].set_xlabel('Blood Type')
ax[1, 1].set_xlabel('Height')
ax[0, 0].set_ylabel('Age Mean')
ax[0, 1].set_ylabel('Frequency')
ax[1, 0].set_ylabel('Height')
ax[1, 1].set_ylabel('Weight')
plt.show()
'# Coding > 데이터 분석을 위한 Python' 카테고리의 다른 글
Python 통계 가설 검정 (0) | 2023.10.02 |
---|---|
Python 기술 통계 (0) | 2023.10.02 |
Python 데이터 전처리 (0) | 2023.10.02 |
Python 판다스 (0) | 2023.10.02 |
Python 넘파이 (0) | 2023.10.02 |