首页 > 解决方案 > 在 Python 中沿数轴(刻度线和脊椎)绘制分类变量

问题描述

我正在尝试制作一种“数字线”图(有点像扁平直方图),其中每个变量的全部范围都显示为一条线,该线根据数据集中实际表示的值进行着色。我在数值变量方面相当成功,但对于分类变量,事情变得越来越棘手。

如何使分类标签在两个刻度之间居中并使彩色线停止并正确结束,以便它们也以标签为中心?

以下是一些要使用的示例数据

#Imports
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from datetime import date, datetime, timedelta
import datetime
from google.colab import files
import scipy
%matplotlib inline
pd.options.display.max_columns = 100

# Import sample data
gID = '0'
docID = '10f0BkWqf4rI9MFkR6NduntKj1oZ6aXXzezCXb06v1CU'
sample_data = pd.read_csv('https://docs.google.com/spreadsheets/d/' + docID + '/export?gid=' + gID + '&format=csv')
sample_data

这是我为数值制作的函数,效果很好:

def numberLine(data, interval): # data = the list for which you want each value in expanded
  rounded = []
  for val in data: 
    round_item = 0
    count = 0
    while val > (interval - 1): # keep adding up the interval until you can't add another without going over val
      val -= interval
      count += 1 # count how many intervals were required
    round_item = count * interval # make list of vals 'rounded' to their lowest nearest interval
    rounded.append(round_item)
  x = [] # Make x, y list with results
  placeholder = []
  for val in rounded: # for every 'rounded' value, make a list of every number (with a resolution of the interval/100) in the interval
    placeholder = np.arange(val, (val + interval),(interval/100)).tolist()
    x = x + placeholder
  y = np.zeros(len(x)) # set the y values to all zeros for number line
  limit = max(x) # length of x-axis scale, but you don't have to use this if you want to set it manually while plotting
  return x,y,limit

这是我对分类变量的了解:(这是不完全正确的部分,尽管它运行并且要点在这里)

def numberLine_cat(data,bounds):
  dat = list(data) # make data column into a list and capitalize first letter
  #dat = [data.capitalize() for item in dat]
  #bounds = [bounds.capitalize() for item in bounds]
  placeholder = 0
  count = 0
  final = []
  for val in bounds: # for the entire set of possible categories, find those which data includes
    if val in dat: # each category becomes a 25-value set (ie cat1 = 0-100, cat2 = 100,200).
      placeholder = np.arange(((count * 10)+ 5), ((count*10)+15), .1).tolist() # if numberline looks 'dotted', change '1' to a fraction of tot numbers
      final = placeholder + final
      count += 1
    elif val not in dat:
      count +=1
  x = final
  y = np.zeros(len(final))
  limit = len(bounds)*10 # length of the x-axis scale is the total # categories * 100
  return x, y, limit

这里是如何绘制它,使用基于https://matplotlib.org/examples/ticks_and_spines/tick-locators.html的代码

# Define setup(ax, lim)
def setup(ax,lim): #ax stays the way it is, lim is the x axis max limit
    ax.spines['right'].set_color('none')
    ax.spines['left'].set_color('none')
    ax.yaxis.set_major_locator(ticker.NullLocator())
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('bottom')
    ax.tick_params(which='major', width=1.00)
    ax.tick_params(which='major', length=5)
    ax.tick_params(which='minor', width=0.75)
    ax.tick_params(which='minor', length=2.5)
    ax.set_xlim(0, lim)
    ax.set_ylim(0, 1)
    ax.patch.set_alpha(0.0)

x, y, limit = numberLine(sample_data['Precipitation'],50);
x1, y1,limit1 = numberLine(sample_data['Temperature'],15)

species_bounds = ['dog','tree','mouse','elephant','dinosaur','turtle','human','dolphin','flower','elk','moose']
x2, y2, limit2 = numberLine_cat(sample_data['Species'],species_bounds)

#@title
fig = plt.figure(figsize=(10, 2))
n = 3
ticklabelpad = mpl.rcParams['xtick.major.pad']

# Precip test
ax = plt.subplot(n, 1, 1) # Not sure why this is necessary..
setup(ax, limit)
ax.xaxis.set_major_locator(ticker.AutoLocator())
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
#ax.text(0.0, 0.1, "AutoLocator()", fontsize=14, transform=ax.transAxes)
ax.scatter(x,y,linewidth = '2', clip_on=False); # can add zorder > 0 to hide axis within points
ax.annotate('Precip. (mm)', xy=(-.13,0.15), xytext=(-1,-ticklabelpad), ha='left', va='top', xycoords='axes fraction', textcoords='offset points')

# Temperature test
ax = plt.subplot(n, 1, 2) # Not sure why this is necessary..
setup(ax, limit1)
ax.xaxis.set_major_locator(ticker.AutoLocator())
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
#ax.text(0.0, 0.1, "AutoLocator()", fontsize=14, transform=ax.transAxes)
ax.scatter(x1,y1,linewidth = '2', clip_on=False); # can add zorder > 0 to hide axis within points
ax.annotate('Temp (C)', xy=(-.1,0.15), xytext=(-1,-ticklabelpad), ha='left', va='top', xycoords='axes fraction', textcoords='offset points')

# Animal test
ax = plt.subplot(n, 1, 3) # Not sure why this is necessary..
setup(ax, limit2)
ax.xaxis.set_major_locator(ticker.AutoLocator())
ax.xaxis.set_minor_locator(ticker.AutoMinorLocator())
ax.set_xticklabels(species_bounds)
#ax.text(0.0, 0.1, "AutoLocator()", fontsize=14, transform=ax.transAxes)
ax.scatter(x2,y2,clip_on=False); # can add zorder > 0 to hide axis within points
ax.annotate('Animals', xy=(-.1,0.15), xytext=(-1,-ticklabelpad), ha='left', va='top', xycoords='axes fraction', textcoords='offset points')

它看起来是这样的:(最后一行应该只突出显示狗、猫、树、大象、人类、海豚和花)

上面用代码生成的数线图

非常感谢,任何想尝试一下的人!(这也是我的第一篇文章,所以如果您有任何提示,我会全力以赴。)

标签: pythonmatplotlibplotscatter-plotcategorical-data

解决方案


推荐阅读