I'm using matplotlib in python to build a scatter plot.
suppose I have the following 2 data lists.
X=[1,2,3,4,5]
Y=[6,7,8,9,10]
then I use X as the X-axis value and Y as the Y-axis value to make a scatter plot. So I will have a picture with 5 scattering points on it, right?
Now the question: is it possible to build connection for these 5 points with the actual data. For example, when I click on one of these 5 points, it can tell me what original data I have used to make this point?
thanks in advance
Using a slightly modified version of Joe Kington's DataCursor:
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
import matplotlib.cbook as cbook
import numpy as np
def fmt(x, y):
return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x = x, y = y)
class DataCursor(object):
# https://stackoverflow.com/a/4674445/190597
"""A simple data cursor widget that displays the x,y location of a
matplotlib artist when it is selected."""
def __init__(self, artists, x = [], y = [], tolerance = 5, offsets = (-20, 20),
formatter = fmt, display_all = False):
"""Create the data cursor and connect it to the relevant figure.
"artists" is the matplotlib artist or sequence of artists that will be
selected.
"tolerance" is the radius (in points) that the mouse click must be
within to select the artist.
"offsets" is a tuple of (x,y) offsets in points from the selected
point to the displayed annotation box
"formatter" is a callback function which takes 2 numeric arguments and
returns a string
"display_all" controls whether more than one annotation box will
be shown if there are multiple axes. Only one will be shown
per-axis, regardless.
"""
self._points = np.column_stack((x,y))
self.formatter = formatter
self.offsets = offsets
self.display_all = display_all
if not cbook.iterable(artists):
artists = [artists]
self.artists = artists
self.axes = tuple(set(art.axes for art in self.artists))
self.figures = tuple(set(ax.figure for ax in self.axes))
self.annotations = {}
for ax in self.axes:
self.annotations[ax] = self.annotate(ax)
for artist in self.artists:
artist.set_picker(tolerance)
for fig in self.figures:
fig.canvas.mpl_connect('pick_event', self)
def annotate(self, ax):
"""Draws and hides the annotation box for the given axis "ax"."""
annotation = ax.annotate(self.formatter, xy = (0, 0), ha = 'right',
xytext = self.offsets, textcoords = 'offset points', va = 'bottom',
bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.5),
arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')
)
annotation.set_visible(False)
return annotation
def snap(self, x, y):
"""Return the value in self._points closest to (x, y).
"""
idx = np.nanargmin(((self._points - (x,y))**2).sum(axis = -1))
return self._points[idx]
def __call__(self, event):
"""Intended to be called through "mpl_connect"."""
# Rather than trying to interpolate, just display the clicked coords
# This will only be called if it's within "tolerance", anyway.
x, y = event.mouseevent.xdata, event.mouseevent.ydata
annotation = self.annotations[event.artist.axes]
if x is not None:
if not self.display_all:
# Hide any other annotation boxes...
for ann in self.annotations.values():
ann.set_visible(False)
# Update the annotation in the current axis..
x, y = self.snap(x, y)
annotation.xy = x, y
annotation.set_text(self.formatter(x, y))
annotation.set_visible(True)
event.canvas.draw()
x=[1,2,3,4,5]
y=[6,7,8,9,10]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
scat = ax.scatter(x, y)
DataCursor(scat, x, y)
plt.show()
yields
You can click on any of the points and the balloon will show the underlying data values.
My slight modification to the DataCursor was to add the snap
method, which ensures that the data point displayed came from the original data set, rather than the location where the mouse actually clicked.
If you have scipy installed, you might prefer this version of the Cursor, which makes the balloon follow the mouse (without clicking):
import datetime as DT
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import numpy as np
import scipy.spatial as spatial
def fmt(x, y, is_date):
if is_date:
x = mdates.num2date(x).strftime("%Y-%m-%d")
return 'x: {x}\ny: {y}'.format(x=x, y=y)
else:
return 'x: {x:0.2f}\ny: {y:0.2f}'.format(x=x, y=y)
class FollowDotCursor(object):
"""Display the x,y location of the nearest data point."""
def __init__(self, ax, x, y, tolerance=5, formatter=fmt, offsets=(-20, 20)):
try:
x = np.asarray(x, dtype='float')
self.is_date = False
except (TypeError, ValueError):
x = np.asarray(mdates.date2num(x), dtype='float')
self.is_date = True
y = np.asarray(y, dtype='float')
self._points = np.column_stack((x, y))
self.offsets = offsets
self.scale = x.ptp()
self.scale = y.ptp() / self.scale if self.scale else 1
self.tree = spatial.cKDTree(self.scaled(self._points))
self.formatter = formatter
self.tolerance = tolerance
self.ax = ax
self.fig = ax.figure
self.ax.xaxis.set_label_position('top')
self.dot = ax.scatter(
[x.min()], [y.min()], s=130, color='green', alpha=0.7)
self.annotation = self.setup_annotation()
plt.connect('motion_notify_event', self)
def scaled(self, points):
points = np.asarray(points)
return points * (self.scale, 1)
def __call__(self, event):
ax = self.ax
# event.inaxes is always the current axis. If you use twinx, ax could be
# a different axis.
if event.inaxes == ax:
x, y = event.xdata, event.ydata
elif event.inaxes is None:
return
else:
inv = ax.transData.inverted()
x, y = inv.transform([(event.x, event.y)]).ravel()
annotation = self.annotation
x, y = self.snap(x, y)
annotation.xy = x, y
annotation.set_text(self.formatter(x, y, self.is_date))
self.dot.set_offsets((x, y))
bbox = ax.viewLim
event.canvas.draw()
def setup_annotation(self):
"""Draw and hide the annotation box."""
annotation = self.ax.annotate(
'', xy=(0, 0), ha = 'right',
xytext = self.offsets, textcoords = 'offset points', va = 'bottom',
bbox = dict(
boxstyle='round,pad=0.5', fc='yellow', alpha=0.75),
arrowprops = dict(
arrowstyle='->', connectionstyle='arc3,rad=0'))
return annotation
def snap(self, x, y):
"""Return the value in self.tree closest to x, y."""
dist, idx = self.tree.query(self.scaled((x, y)), k=1, p=1)
try:
return self._points[idx]
except IndexError:
# IndexError: index out of bounds
return self._points[0]
x = [DT.date.today()+DT.timedelta(days=i) for i in [10,20,30,40,50]]
y = [6,7,8,9,10]
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.scatter(x, y)
cursor = FollowDotCursor(ax, x, y)
fig.autofmt_xdate()
plt.show()