Visualization
Visualization - [plotly library]
_data
2022. 12. 18. 11:10
matplotlib보다 더욱 직관적이고, 분석하는데에 용이하다. 마우스 커서로 값들을 볼 수 있는 장점이 있다.
코드는 복잡하니 외우지말고 스크랩해서 수정하면 된다.
plotly library
import pandas as pd
import numpy as np
import matplotlib as mpl
from plotly.offline import import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go
from wordcloud import WordCloud
Line Plot
# prepare data frame
df = timesData.iloc[:100,:]
# Creating trace1
trace1 = go.Scatter(
x = df.world_rank,
y = df.citations,
mode = "lines",
name = "citations",
marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
text= df.university_name)
# Creating trace2
trace2 = go.Scatter(
x = df.world_rank,
y = df.teaching,
mode = "lines+markers",
name = "teaching",
marker = dict(color = 'rgba(80, 26, 80, 0.8)'),
text= df.university_name)
data = [trace1, trace2]
layout = dict(title = 'Citation and Teaching vs World Rank of Top 100 Universities',
xaxis= dict(title= 'World Rank',ticklen= 5,zeroline= False)
)
fig = dict(data = data, layout = layout)
iplot(fig)
Scatter Plot
# prepare data frames
df2014 = timesData[timesData.year == 2014].iloc[:100,:]
df2015 = timesData[timesData.year == 2015].iloc[:100,:]
df2016 = timesData[timesData.year == 2016].iloc[:100,:]
# creating trace1
trace1 =go.Scatter(
x = df2014.world_rank,
y = df2014.citations,
mode = "markers",
name = "2014",
marker = dict(color = 'rgba(255, 128, 255, 0.8)'),
text= df2014.university_name)
# creating trace2
trace2 =go.Scatter(
x = df2015.world_rank,
y = df2015.citations,
mode = "markers",
name = "2015",
marker = dict(color = 'rgba(255, 128, 2, 0.8)'),
text= df2015.university_name)
# creating trace3
trace3 =go.Scatter(
x = df2016.world_rank,
y = df2016.citations,
mode = "markers",
name = "2016",
marker = dict(color = 'rgba(0, 255, 200, 0.8)'),
text= df2016.university_name)
data = [trace1, trace2, trace3]
layout = dict(title = 'Citation vs world rank of top 100 universities with 2014, 2015 and 2016 years',
xaxis= dict(title= 'World Rank',ticklen= 5,zeroline= False),
yaxis= dict(title= 'Citation',ticklen= 5,zeroline= False)
)
fig = dict(data = data, layout = layout)
iplot(fig)
Bar Chart
df2014 = timesData[timesData.year == 2014].iloc[:3, :]
#create trace1
trace1 = go.Bar(
x = df2014.university_name,
y = df2014.citations,
name = "citations",
marker = dict(color = 'rgba(255, 174, 255, 0.5)',
line=dict(color='rgb(0,0,0)',width=1.5)),
text = df2014.country)
# create trace2
trace2 = go.Bar(
x = df2014.university_name,
y = df2014.teaching,
name = "teaching",
marker = dict(color = 'rgba(255, 255, 128, 0.5)',
line=dict(color='rgb(0,0,0)',width=1.5)),
text = df2014.country)
data = [trace1, trace2]
#group mode
layout= go.Layout(barmode='group')
fig=go.Figure(data= data, layout=layout)
iplot(fig)
Bar Chart 2
x= df2014.university_name
trace1= {
'x':x,
'y': df2014.citations,
'name':'citation',
'type':'bar'
}
trace2 = {
'x':x,
'y':df2014.teaching,
'name':'teaching',
'type':'bar'
}
data= [trace1, trace2]
layout= {
'xaxis':{'title':'Top 3 대학'},
'barmode':'relative',
'title':'citations and teacing of top 3 universities in 2014'
}
fig = go. Figure(data=data, layout=layout)
iplot(fig)
Pie Chart
df2016 = timesData[timesData.year==2016].iloc[:7,:]
#값에 2243이 있는데 2,243으로 표기됨. 바꾸어주어야 함.
pie1 = df2016.num_students
pie1_list = [float(each.replace(',','.')) for each in df2016.num_students]
labels = df2016.university_name
#data, layout
fig = {
'data':[
{
'values':pie1_list,
'labels':labels,
'domain':{'x':[0, .5]},
'name':'num of students rates',
'hoverinfo':'label+percent+name',
'hole':.3,
'type':'pie'
},
],
'layout': {
'title':'대학 학생 비율',
'annotations':[
{
'font':{'size':20},
'showarrow':False,
'text': ' Number of students',
'x':0.20,
'y':1
},
]
}
}
iplot(fig)
Histogram
x2011 = timesData.student_staff_ratio[timesData.year==2011]
x2012 = timesData.student_staff_ratio[timesData.year == 2012]
#create trace1
trace1 = go.Histogram(x=x2011,
opacity= .75,
name='2011',
marker=dict(color= 'rgba(171,50,96, .6)'))
#create trace2
trace2 = go.Histogram(x= x2012,
opacity=.75,
name='2012',
marker=dict(color='rgba(12,50,196, .6)'))
data=[trace1, trace2]
layout= go.Layout(barmode='overlay',
title='',
xaxis=dict(title='students-staff - ratio'),
yaxis=dict(title='Count'))
fig= go.Figure(data=data, layout=layout)
iplot(fig)
Word Cloud
import matplotlib.pyplot as plt
plt.subplots(figsize=(8,8))
x2012= timesData.country[timesData.year==2011]
wordcloud=WordCloud(
background_color='white',
width=512,
height=384).generate(" ".join(x2011))
plt.imshow(wordcloud)
plt.axis('off')
plt.savefig('graph.png')
plt.show()
reference : https://www.kaggle.com/kanncaa1/plotly-tutorial-for-beginners/