Remove usage of numpy recarray

Structured numpy arrays are more fundamental than recarrays and sufficient in all cases. Superseeds #26664.
matplotlib · Sep 4, 2023 · 75186cd · 75186cd
1 parent 42336be
commit 75186cd
Show file tree

Hide file tree

Showing 7 changed files with 35 additions and 29 deletions.
diff --git a/galleries/examples/lines_bars_and_markers/fill_between_alpha.py b/galleries/examples/lines_bars_and_markers/fill_between_alpha.py
@@ -18,14 +18,14 @@
 import matplotlib.cbook as cbook
 
 # load up some sample financial data
-r = cbook.get_sample_data('goog.npz')['price_data'].view(np.recarray)
+r = cbook.get_sample_data('goog.npz')['price_data']
 # create two subplots with the shared x and y axes
 fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, sharey=True)
 
-pricemin = r.close.min()
+pricemin = r["close"].min()
 
-ax1.plot(r.date, r.close, lw=2)
-ax2.fill_between(r.date, pricemin, r.close, alpha=0.7)
+ax1.plot(r["date"], r["close"], lw=2)
+ax2.fill_between(r["date"], pricemin, r["close"], alpha=0.7)
 
 for ax in ax1, ax2:
     ax.grid(True)

diff --git a/galleries/examples/lines_bars_and_markers/scatter_demo2.py b/galleries/examples/lines_bars_and_markers/scatter_demo2.py
@@ -14,14 +14,14 @@
 # low, close, volume, adj_close from the mpl-data/sample_data directory. The
 # record array stores the date as an np.datetime64 with a day unit ('D') in
 # the date column.
-price_data = cbook.get_sample_data('goog.npz')['price_data'].view(np.recarray)
+price_data = cbook.get_sample_data('goog.npz')['price_data']
 price_data = price_data[-250:]  # get the most recent 250 trading days
 
-delta1 = np.diff(price_data.adj_close) / price_data.adj_close[:-1]
+delta1 = np.diff(price_data["adj_close"]) / price_data["adj_close"][:-1]
 
 # Marker size in units of points^2
-volume = (15 * price_data.volume[:-2] / price_data.volume[0])**2
-close = 0.003 * price_data.close[:-2] / 0.003 * price_data.open[:-2]
+volume = (15 * price_data["volume"][:-2] / price_data["volume"][0])**2
+close = 0.003 * price_data["close"][:-2] / 0.003 * price_data["open"][:-2]
 
 fig, ax = plt.subplots()
 ax.scatter(delta1[:-1], delta1[1:], c=close, s=volume, alpha=0.5)

diff --git a/galleries/examples/misc/keyword_plotting.py b/galleries/examples/misc/keyword_plotting.py
@@ -3,13 +3,14 @@
 Plotting with keywords
 ======================
 
-There are some instances where you have data in a format that lets you
-access particular variables with strings: for example, with
-`numpy.recarray` or `pandas.DataFrame`.
+Some data structures, like dict, `structured numpy array
+<https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays>`_
+or `pandas.DataFrame` provide access to labelled data via string index access
+``data[key]``.
 
-Matplotlib allows you to provide such an object with the ``data`` keyword
-argument. If provided, you may generate plots with the strings
-corresponding to these variables.
+For these data types, Matplotlib supports passing the whole datastructure via the
+``data`` keyword argument, and use the string names as plot function parameters, where
+you'd normally pass in your data.
 """
 
 import matplotlib.pyplot as plt

diff --git a/galleries/examples/ticks/centered_ticklabels.py b/galleries/examples/ticks/centered_ticklabels.py
@@ -18,18 +18,17 @@
 """
 
 import matplotlib.pyplot as plt
-import numpy as np
 
 import matplotlib.cbook as cbook
 import matplotlib.dates as dates
 import matplotlib.ticker as ticker
 
 # Load some financial data; Google's stock price
-r = cbook.get_sample_data('goog.npz')['price_data'].view(np.recarray)
+r = cbook.get_sample_data('goog.npz')['price_data']
 r = r[-250:]  # get the last 250 days
 
 fig, ax = plt.subplots()
-ax.plot(r.date, r.adj_close)
+ax.plot(r["date"], r["adj_close"])
 
 ax.xaxis.set_major_locator(dates.MonthLocator())
 # 16 is a slight approximation since months differ in number of days.
@@ -45,5 +44,5 @@
 for label in ax.get_xticklabels(minor=True):
     label.set_horizontalalignment('center')
 imid = len(r) // 2
-ax.set_xlabel(str(r.date[imid].item().year))
+ax.set_xlabel(str(r["date"][imid].item().year))
 plt.show()
diff --git a/galleries/examples/ticks/date_index_formatter.py b/galleries/examples/ticks/date_index_formatter.py
@@ -32,10 +32,10 @@
 fig.get_layout_engine().set(hspace=0.15)
 
 # First we'll do it the default way, with gaps on weekends
-ax1.plot(r.date, r.adj_close, 'o-')
+ax1.plot(r["date"], r["adj_close"], 'o-')
 
 # Highlight gaps in daily data
-gaps = np.flatnonzero(np.diff(r.date) > np.timedelta64(1, 'D'))
+gaps = np.flatnonzero(np.diff(r["date"]) > np.timedelta64(1, 'D'))
 for gap in r[['date', 'adj_close']][np.stack((gaps, gaps + 1)).T]:
     ax1.plot(gap.date, gap.adj_close, 'w--', lw=2)
 ax1.legend(handles=[ml.Line2D([], [], ls='--', label='Gaps in daily data')])
@@ -51,12 +51,12 @@
 def format_date(x, _):
     try:
         # convert datetime64 to datetime, and use datetime's strftime:
-        return r.date[round(x)].item().strftime('%a')
+        return r["date"][round(x)].item().strftime('%a')
     except IndexError:
         pass
 
 # Create an index plot (x defaults to range(len(y)) if omitted)
-ax2.plot(r.adj_close, 'o-')
+ax2.plot(r["adj_close"], 'o-')
 
 ax2.set_title("Plot y at Index Coordinates Using Custom Formatter")
 ax2.xaxis.set_major_formatter(format_date)  # internally creates FuncFormatter
@@ -79,6 +79,6 @@ def __call__(self, x, pos=0):
             pass
 
 
-ax2.xaxis.set_major_formatter(MyFormatter(r.date, '%a'))
+ax2.xaxis.set_major_formatter(MyFormatter(r["date"], '%a'))
 
 plt.show()
diff --git a/galleries/tutorials/pyplot.py b/galleries/tutorials/pyplot.py
@@ -106,8 +106,10 @@
 # =============================
 #
 # There are some instances where you have data in a format that lets you
-# access particular variables with strings. For example, with
-# `numpy.recarray` or `pandas.DataFrame`.
+# access particular variables with strings. For example, with `structured arrays`_
+# or `pandas.DataFrame`.
+#
+# .. _structured arrays: https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays
 #
 # Matplotlib allows you to provide such an object with
 # the ``data`` keyword argument. If provided, then you may generate plots with

diff --git a/galleries/users_explain/quick_start.py b/galleries/users_explain/quick_start.py
@@ -126,10 +126,14 @@
 #   b = np.matrix([[1, 2], [3, 4]])
 #   b_asarray = np.asarray(b)
 #
-# Most methods will also parse an addressable object like a *dict*, a
-# `numpy.recarray`, or a `pandas.DataFrame`.  Matplotlib allows you to
-# provide the ``data`` keyword argument and generate plots passing the
-# strings corresponding to the *x* and *y* variables.
+# Most methods will also parse a string-indexable object like a *dict*, a
+# `structured numpy array
+# <https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays>`_, or a
+# `pandas.DataFrame`.  Matplotlib allows you to provide the ``data`` keyword argument
+# and generate plots passing the strings corresponding to the *x* and *y* variables.
+#
+# .. _structured numpy array: https://numpy.org/doc/stable/user/basics.rec.html#structured-arrays
+
 np.random.seed(19680801)  # seed the random number generator.
 data = {'a': np.arange(50),
         'c': np.random.randint(0, 50, 50),