Skip to content

Commit

Permalink
Remove usage of numpy recarray
Browse files Browse the repository at this point in the history
Structured numpy arrays are more fundamental than recarrays
and sufficient in all cases.

Superseeds #26664.
  • Loading branch information
timhoffm committed Sep 4, 2023
1 parent 42336be commit 75186cd
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,14 @@
import matplotlib.cbook as cbook

# load up some sample financial data
r = cbook.get_sample_data('goog.npz')['price_data'].view(np.recarray)
r = cbook.get_sample_data('goog.npz')['price_data']
# create two subplots with the shared x and y axes
fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=True)

pricemin = r.close.min()
pricemin = r["close"].min()

ax1.plot(r.date, r.close, lw=2)
ax2.fill_between(r.date, pricemin, r.close, alpha=0.7)
ax1.plot(r["date"], r["close"], lw=2)
ax2.fill_between(r["date"], pricemin, r["close"], alpha=0.7)

for ax in ax1, ax2:
ax.grid(True)
Expand Down
8 changes: 4 additions & 4 deletions galleries/examples/lines_bars_and_markers/scatter_demo2.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
# low, close, volume, adj_close from the mpl-data/sample_data directory. The
# record array stores the date as an np.datetime64 with a day unit ('D') in
# the date column.
price_data = cbook.get_sample_data('goog.npz')['price_data'].view(np.recarray)
price_data = cbook.get_sample_data('goog.npz')['price_data']
price_data = price_data[-250:] # get the most recent 250 trading days

delta1 = np.diff(price_data.adj_close) / price_data.adj_close[:-1]
delta1 = np.diff(price_data["adj_close"]) / price_data["adj_close"][:-1]

# Marker size in units of points^2
volume = (15 * price_data.volume[:-2] / price_data.volume[0])**2
close = 0.003 * price_data.close[:-2] / 0.003 * price_data.open[:-2]
volume = (15 * price_data["volume"][:-2] / price_data["volume"][0])**2
close = 0.003 * price_data["close"][:-2] / 0.003 * price_data["open"][:-2]

fig, ax = plt.subplots()
ax.scatter(delta1[:-1], delta1[1:], c=close, s=volume, alpha=0.5)
Expand Down
13 changes: 7 additions & 6 deletions galleries/examples/misc/keyword_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
Plotting with keywords
======================
There are some instances where you have data in a format that lets you
access particular variables with strings: for example, with
`numpy.recarray` or `pandas.DataFrame`.
Some data structures, like dict, `structured numpy array
<https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays>`_
or `pandas.DataFrame` provide access to labelled data via string index access
``data[key]``.
Matplotlib allows you to provide such an object with the ``data`` keyword
argument. If provided, you may generate plots with the strings
corresponding to these variables.
For these data types, Matplotlib supports passing the whole datastructure via the
``data`` keyword argument, and use the string names as plot function parameters, where
you'd normally pass in your data.
"""

import matplotlib.pyplot as plt
Expand Down
7 changes: 3 additions & 4 deletions galleries/examples/ticks/centered_ticklabels.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,17 @@
"""

import matplotlib.pyplot as plt
import numpy as np

import matplotlib.cbook as cbook
import matplotlib.dates as dates
import matplotlib.ticker as ticker

# Load some financial data; Google's stock price
r = cbook.get_sample_data('goog.npz')['price_data'].view(np.recarray)
r = cbook.get_sample_data('goog.npz')['price_data']
r = r[-250:] # get the last 250 days

fig, ax = plt.subplots()
ax.plot(r.date, r.adj_close)
ax.plot(r["date"], r["adj_close"])

ax.xaxis.set_major_locator(dates.MonthLocator())
# 16 is a slight approximation since months differ in number of days.
Expand All @@ -45,5 +44,5 @@
for label in ax.get_xticklabels(minor=True):
label.set_horizontalalignment('center')
imid = len(r) // 2
ax.set_xlabel(str(r.date[imid].item().year))
ax.set_xlabel(str(r["date"][imid].item().year))
plt.show()
10 changes: 5 additions & 5 deletions galleries/examples/ticks/date_index_formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@
fig.get_layout_engine().set(hspace=0.15)

# First we'll do it the default way, with gaps on weekends
ax1.plot(r.date, r.adj_close, 'o-')
ax1.plot(r["date"], r["adj_close"], 'o-')

# Highlight gaps in daily data
gaps = np.flatnonzero(np.diff(r.date) > np.timedelta64(1, 'D'))
gaps = np.flatnonzero(np.diff(r["date"]) > np.timedelta64(1, 'D'))
for gap in r[['date', 'adj_close']][np.stack((gaps, gaps + 1)).T]:
ax1.plot(gap.date, gap.adj_close, 'w--', lw=2)
ax1.legend(handles=[ml.Line2D([], [], ls='--', label='Gaps in daily data')])
Expand All @@ -51,12 +51,12 @@
def format_date(x, _):
try:
# convert datetime64 to datetime, and use datetime's strftime:
return r.date[round(x)].item().strftime('%a')
return r["date"][round(x)].item().strftime('%a')
except IndexError:
pass

# Create an index plot (x defaults to range(len(y)) if omitted)
ax2.plot(r.adj_close, 'o-')
ax2.plot(r["adj_close"], 'o-')

ax2.set_title("Plot y at Index Coordinates Using Custom Formatter")
ax2.xaxis.set_major_formatter(format_date) # internally creates FuncFormatter
Expand All @@ -79,6 +79,6 @@ def __call__(self, x, pos=0):
pass


ax2.xaxis.set_major_formatter(MyFormatter(r.date, '%a'))
ax2.xaxis.set_major_formatter(MyFormatter(r["date"], '%a'))

plt.show()
6 changes: 4 additions & 2 deletions galleries/tutorials/pyplot.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@
# =============================
#
# There are some instances where you have data in a format that lets you
# access particular variables with strings. For example, with
# `numpy.recarray` or `pandas.DataFrame`.
# access particular variables with strings. For example, with `structured arrays`_
# or `pandas.DataFrame`.
#
# .. _structured arrays: https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays
#
# Matplotlib allows you to provide such an object with
# the ``data`` keyword argument. If provided, then you may generate plots with
Expand Down
12 changes: 8 additions & 4 deletions galleries/users_explain/quick_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,14 @@
# b = np.matrix([[1, 2], [3, 4]])
# b_asarray = np.asarray(b)
#
# Most methods will also parse an addressable object like a *dict*, a
# `numpy.recarray`, or a `pandas.DataFrame`. Matplotlib allows you to
# provide the ``data`` keyword argument and generate plots passing the
# strings corresponding to the *x* and *y* variables.
# Most methods will also parse a string-indexable object like a *dict*, a
# `structured numpy array
# <https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays>`_, or a
# `pandas.DataFrame`. Matplotlib allows you to provide the ``data`` keyword argument
# and generate plots passing the strings corresponding to the *x* and *y* variables.
#
# .. _structured numpy array: https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays

Check failure on line 135 in galleries/users_explain/quick_start.py

View workflow job for this annotation

GitHub Actions / flake8

[flake8] reported by reviewdog 🐶 E501 line too long (97 > 88 characters) Raw Output: ./galleries/users_explain/quick_start.py:135:89: E501 line too long (97 > 88 characters)

np.random.seed(19680801) # seed the random number generator.
data = {'a': np.arange(50),
'c': np.random.randint(0, 50, 50),
Expand Down

0 comments on commit 75186cd

Please sign in to comment.