数据可视化、matplotlib使用小结

初始化

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np
fig, ax = plt.subplots()
ax.set_title() # 设置图表标题
plt.savefig() # 以图片形式保存至本地目录

设置坐标轴属性

设置坐标位置和标签

ax.set_xlabel() # 设置坐标轴名称
ax.set_xticks() # 控制标签位置
ax.set_xlim() # 设置坐标轴范围
ax.set_xticklabels() # 控制标签内容 
format='%d ms'
ticks = mtick.FormatStrFormatter(format)
ax.xaxis.set_major_formatter(ticks) # 格式化标签内容
ax.xaxis.set_tick_params() # 设置标签参数
ax.yaxis.tick_right() # 改变坐标轴位置
ax.minorticks_on() # 显示坐标轴小格
ax.set_xscale('log') # 设置对数坐标

实例

import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import numpy as np

x = np.arange(0, 5, 0.1)
y = np.sin(x)
x_ticks = np.arange(6)
x_ticklabels = ['A','B','C','D','E','F']
fig, ax = plt.subplots(figsize=(14,7))


ax.plot(x, y)
format = 'sin(y) = %.2f'
ticks = mtick.FormatStrFormatter(format)
ax.yaxis.set_major_formatter(ticks)
ax.yaxis.tick_right()

ax.set_xticks(x_ticks)
ax.set_xticklabels(x_ticklabels)
ax.xaxis.set_tick_params(rotation=45,labelsize=18,colors='r')
ax.set_xlim(0,10) 
ax.set_yscale('log')
ax.minorticks_on()

plt.show()

image-20201127153757509

绘制子图

fig, ax = plt.subplots(2, 2, figsize = (14, 7))
ax[0][0] # 左上角的子图
ax[1][1] # 右下角的子图

折线图

平滑化

from scipy import interpolate # 引入scipy包拟合曲线
xnew = np.linspace(x.min(),x.max(),300) # 数据点数量设置为300
func = interpolate.interp1d(x, y, kind = 'cubic') # 设置拟合函数
ynew = func(xnew) # 生成y轴数据

实例

import matplotlib.pyplot as plt
import numpy as np
from scipy import interpolate

fig, ax = plt.subplots(1, 2, figsize = (14, 7))

#ax1
x_row = [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]
y_row = [500, 1700, 3900, 10500, 9200, 7200, 8500, 9700, 6000, 2000, 800, 400, 200, 100]
x = np.array(x_row)
y = np.array(y_row)
#平滑化
xnew = np.linspace(x.min(),x.max(),300) #300 represents number of points to make between T.min and T.max
func = interpolate.interp1d(x, y, kind = 'cubic')
ynew = func(xnew)

ax[0].set_title('Relation between time and Number of tourists', fontsize = 10)
ax[0].set_ylabel('Number of tourists')
ax[0].set_xlabel('time(hour)')
ax[0].set_xlim((9, 18))
ax[0].set_ylim((0,12000))
ax[0].set_xticks([9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
ax[0].set_yticks([2000,4000,6000,8000,10000,12000])
ax[0].plot(xnew, ynew)

y_row = [500, 1000, 3500, 8000, 7000, 5800, 6200, 7800, 4200, 4300, 4700, 3700, 2300, 1000]
y = np.array(y_row)
func = interpolate.interp1d(x, y, kind = 'cubic')
ynew = func(xnew)
ax[1].set_title('Relation between time and Number of tourists', fontsize = 10)
ax[1].set_ylabel('Number of tourists')
ax[1].set_xlabel('time(hour)')
ax[1].set_xlim((8, 22))
ax[1].set_ylim((0,12000))
ax[1].set_xticks([8, 10, 12, 14, 16, 18, 20, 22])
ax[1].set_yticks([2000,4000,6000,8000,10000,12000])
ax[1].plot(xnew, ynew)

plt.show()

image-20201127154644891

线条和图例

ax.plot(x, y, "ro-", label = "A") # 红色实线,数据点为圆点
ax.plot(x, y, "bx--", label = "B") # 蓝色虚线,数据点为叉
ax.plot(x, y, marker = 'D', alpha = 0.8, color = 'pink', linestyle = '-.', linewidth = 3, label = 'C') # 
# 常用参数:
# marker:数据点样式
# alpha:透明度
# color:线条颜色
# linestyle:线条样式
# linewidth:线条宽度
# label:数据名称(用于生成图例)
# 更多参数参考手册:https://matplotlib.org/api/_as_gen/matplotlib.pyplot.plot.html#matplotlib.pyplot.plot
ax.legend()

实例

import matplotlib.pyplot as plt
import numpy as np

x = np.arange(0, 5, 0.1)
y = np.sin(x)
y2 = np.cos(x)
y3 = x * x / 5
fig, ax = plt.subplots(figsize=(14,7))

ax.plot(x, y, "ro-", label = 'A')
ax.plot(x, y2, "bx--", label = 'B')
ax.plot(x, y3, marker = 'D', alpha = 0.8, color = 'pink', linestyle = '-.', linewidth = 3, label = 'C')
plt.legend()

plt.show()

image-20201127160331696

实例1

import matplotlib.pyplot as plt
import numpy as np

x_labels = ['CHI611', 'CHI612', 'CHI621', 'CHI622', 'CHI711', 'CHI712', 'CHI721', 'CHI722', 'CHI811', 'CHI812', 'CHI821', 'CHI822', 'CHI911', 'CHI912', 'CHI921', 'CHI611', 'CHI612', 'CHI621', 'CHI622', 'CHI711', 'CHI712', 'CHI721', 'CHI722', 'CHI811', 'CHI812', 'CHI821', 'CHI822', 'CHI911', 'CHI912', 'CHI921']
y_A13 = [82.0, 80.0, 87.0, 83.0, 79.0, 80.0, 82.0, 75.0, 85.0, 86.0, 79.0, 79.16666666666666, 80.66666666666666, 81.33333333333333, 80.0]
y_A15 = [80.0, 80.0, 84.0, 80.0, 81.0, 80.0, 81.0, 77.0, 84.0, 87.0, 80.0, 80.83333333333333, 78.0, 84.0, 81.33333333333333]
x = np.arange(len(y_A13))

fig, ax = plt.subplots(figsize=(14,7))
ax.plot(x, y_A13, "ro-", label = "stu_A13")
ax.plot(x, y_A15, "bx-", label = "stu_A15")
ax.set_xticks(x)
ax.set_xticklabels(x_labels)
ax.set_ylim((40,100))
ax.set_ylabel('scores')
ax.set_title("A13 & A15's Chinese scores in different exams")
ax.legend()
plt.show()

image-20201130195809660

饼图

ax.pie(data, labels = labels, explode = explode, counterclock = 0, autopct = '%1.0f%%', textprops = {'fontsize' : 12, 'color' : 'w'})
# 常见参数
# labels:设置外侧说明文字
# explode:设置每一块离开中心距离
# counterlock:设置顺时针/逆时针
# autopct:设置数值格式
# textprops:设置数值样式
# colors:设置颜色

图例

ax.legend(loc='upper center', ncol = 7) 
# 常见参数:
# ncol:设置图例列数

实例

import matplotlib.pyplot as plt
import numpy as np

labels = ['French', 'English', 'Spanish', 'Chinese', 'German', 'Italian', 'Other']
explode = [0.02, 0.05, 0.02, 0.02, 0.02, 0.02, 0.02]
data = [25, 30, 10, 13, 9, 8, 5]

fig, ax = plt.subplots(figsize=(14,7))

ax.pie(data, labels = labels, explode = explode, counterclock = 0,
autopct='%1.0f%%', textprops={'fontsize': 12, 'color': 'w'}, 
colors=('b', 'g', 'r', 'c', 'm', 'y', 'k'))
ax.legend(loc='upper center', ncol = 7)

plt.show()

image-20201130200609977

条形图

ax.bar(x - width / 2, y1, width, color = '#FFCCCC')

实例

import matplotlib.pyplot as plt
import numpy as np

labels = ['A','B','C','D','E','F','G']
F_count = [14, 18, 18, 14, 11, 7, 9]
M_count = [18, 16, 17, 14, 13, 9, 11]

width = 0.25
x = np.arange(len(labels))
y1 = np.asarray(F_count, int)
y2 = np.asarray(M_count, int)

fig, ax = plt.subplots(figsize=(14,7))
ax.bar(x - width / 2, y1, width, color = '#FFCCCC')
ax.bar(x + width / 2, y2, width, color = '#6699CC')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.set_xlabel('Class')
ax.set_ylim((0, 20))
ax.set_yticks(np.arange(0, 21, 4))
ax.set_ylabel('number')
ax.set_title('The number of Female and Male in different classes')
ax.legend(['Female', "Male"])

plt.show()

image-20210604145431037

其他比较有趣的图

# Fenerate the genre influence matrix and visualize.(Figure 13)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

filepath = 'datasets/weighed_influence_data.csv'

with open(filepath, 'r', encoding = 'utf-8') as file:
    df = pd.read_csv(filepath)

df = df.loc[df['w'] == df['w']]
print(df.describe())
genre_list = ['Pop/Rock', 'R&B;', 'Jazz', 'Country', 'Latin', 'Electronic', 'Vocal', 'Reggae', 'Blues', 'Folk', 'Religious', 'International',
'Stage & Screen', 'Comedy/Spoken', 'New Age', 'Classical', 'Easy Listening', 'Avant-Garde']

early = [1930, 1930, 1930, 1930, 1930, 1960, 1930, 1940, 1930, 1930, 1930, 1930, 1930, 1930,
1960, 1930, 1930, 1940]

genre_dict = {genre_list[i]:i for i in range(len(genre_list))}

print(genre_dict)

N = len(genre_list) 
M = np.zeros([N, N])
meet = 0

Early = np.zeros(N)
for i in range(N):
    Early[i] = 9999
T = 5

mx = 0
for index, row in df.iterrows():
    inf_genre = row['influencer_main_genre']
    fol_genre = row['follower_main_genre']
    if (not inf_genre in genre_dict) or (not fol_genre in genre_dict):
        continue
    inf_num = genre_dict[inf_genre]
    fol_num = genre_dict[fol_genre]
    if (row['influencer_active_start'] != 0):
        Early[inf_num] = min(Early[inf_num], row['influencer_active_start'])
    if (row['follower_active_start'] != 0):
        Early[fol_num] = min(Early[fol_num], row['follower_active_start'])
    if (inf_num == fol_num):
        continue
    M[inf_num, fol_num] += row['w'] * 10 / (T + row['follower_active_start'] - early[fol_num])
    mx = max(mx, M[inf_num, fol_num])
    meet += 1

vec = []
level = {}
for i in range(N):
    for j in range(N):
        M[i, j] /= mx
        vec.append(M[i, j])
print(vec)
vec.sort()
print(vec)
IDX = 0
for i in vec:
    IDX += 1
    if not i in level:
        level[i] = IDX  


for i in range(N):
    for j in range(N):
        M[i, j] = level[M[i, j]]

fig, ax = plt.subplots(figsize=(10, 8))

ax.matshow(M, cmap=plt.get_cmap('Purples'), alpha=0.8)

ax.set_xticklabels(genre_list) 
ax.set_yticklabels(genre_list) 
ax.set_xticks(range(N))
ax.set_yticks(range(N))
for tick in ax.get_xticklabels():
    tick.set_rotation(90)
plt.savefig('plt12/purple_T=%s_value.png'%str(T))
plt.show()

image-20210604144011164

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter

low25 = [0.715499, 0.700573, 0.680814, 0.663754, 0.647511, 0.634152, 0.620071,
0.606473, 0.594698, 0.581464, 0.569964]
low25 = np.asarray(low25)
up25 = [0.922998, 0.919967, 0.916710, 0.913627, 0.910433, 0.907065, 0.903601, 
0.900078, 0.896786, 0.893284, 0.889319]
up25 = np.asarray(up25)

med = [0.837542, 0.830796, 0.822943, 0.814988, 0.807225, 0.799796, 0.792978, 
0.785107, 0.777864, 0.769231, 0.759488]
plt.style.use('seaborn')
fig, ax = plt.subplots(figsize=(14, 7))

x = np.arange(len(low25))
xlabel = x * 2

ax.bar(x, height=up25 - low25, width=0.45, bottom=low25)
ax.plot(x, med, marker = 'D', alpha = 0.8, color = 'pink', linestyle = '--', linewidth = 3, label = 'median')

ax.plot(x, low25, marker = 'X', alpha = 0.8, color = 'black', linestyle = '--', linewidth = 3, label = '25% position')
ax.plot(x, up25, marker = 'o', alpha = 0.8, color = 'black', linestyle = '--', linewidth = 3, label = '75% position')

ax.set_xticks(x)
ax.set_xticklabels(xlabel)
def to_percent(temp, position):
    return '%1.0f'%(2*temp) + '%'
ax.set_ylim((0.5,1))
ax.set_xlabel('Percentage of noise')
ax.set_title('Overall similarity under different noise levels')
plt.gca().xaxis.set_major_formatter(FuncFormatter(to_percent))
plt.legend()
plt.savefig('plt13/plt13.png')
plt.show()

image-20210604144124891

基于networkx:

# Generate the network figure shown in Figure 1.
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np

G = nx.DiGraph()
edges = []
tot = 1000
sz = np.zeros(789)
filepath = 'datasets/formatted_data2.txt'
with open(filepath, "r", encoding='utf-8') as f:
    lines = f.readlines()
    index = 0
    for line in lines:
        index += 1
        if (index == tot + 1): break
        line = line.strip().split()
        edges.append((line[1], line[0]))
        sz[int(line[0]) - 1] += 1
        sz[int(line[1]) - 1] += 1
colors = range(tot)
G.add_edges_from(edges)
print(G.number_of_nodes())
nx.draw(G, 
        edge_cmap=plt.get_cmap('Blues'),
        edge_alpha = 0.7, 
        with_labels=False,
        edge_color=colors,
        pos=nx.spring_layout(G),
        arrows = True,  
        node_color='#0066ff',
        node_size= np.sqrt(sz) * 7,
        width=0.2,
       )
plt.savefig('plt15/fig.png')
plt.show()

image-20210604144250702