pandas-dev · MarcoGorelli · Nov 7, 2023 · Sep 21, 2023 · Oct 7, 2023 · Oct 7, 2023
diff --git a/pandas/core/interchange/from_dataframe.py b/pandas/core/interchange/from_dataframe.py
@@ -266,21 +266,29 @@ def string_column_to_ndarray(col: Column) -> tuple[np.ndarray, Any]:
 
     assert buffers["offsets"], "String buffers must contain offsets"
     # Retrieve the data buffer containing the UTF-8 code units
-    data_buff, protocol_data_dtype = buffers["data"]
-    # We're going to reinterpret the buffer as uint8, so make sure we can do it safely
-    assert protocol_data_dtype[1] == 8
-    assert protocol_data_dtype[2] in (
-        ArrowCTypes.STRING,
-        ArrowCTypes.LARGE_STRING,
-    )  # format_str == utf-8
-    # Convert the buffers to NumPy arrays. In order to go from STRING to
-    # an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
-    data_dtype = (
-        DtypeKind.UINT,
-        8,
-        ArrowCTypes.UINT8,
-        Endianness.NATIVE,
-    )
+    data_buff, data_dtype = buffers["data"]
-    data_buff, data_dtype = buffers["data"]
+    data_buff, _ = buffers["data"]
-    data_buff, data_dtype = buffers["data"]
+    data_buff, _ = buffers["data"]
+
+    if (data_dtype[1] == 8) and (
+        data_dtype[2]
+        in (
+            ArrowCTypes.STRING,
+            ArrowCTypes.LARGE_STRING,
+        )
+    ):  # format_str == utf-8
+        # temporary workaround to keep backwards compatibility due to
+        # https://github.com/pandas-dev/pandas/issues/54781
+
+        # We're going to reinterpret the buffer as uint8, so make sure we can do it
+        # safely
+
+        # Convert the buffers to NumPy arrays. In order to go from STRING to
+        # an equivalent ndarray, we claim that the buffer is uint8 (i.e., a byte array)
+        data_dtype = (
+            DtypeKind.UINT,
+            8,
+            ArrowCTypes.UINT8,
+            Endianness.NATIVE,
+        )
     # Specify zero offset as we don't want to chunk the string data
     data = buffer_to_ndarray(data_buff, data_dtype, offset=0, length=data_buff.bufsize)
 
@@ -378,16 +386,22 @@ def datetime_column_to_ndarray(col: Column) -> tuple[np.ndarray | pd.Series, Any
     buffers = col.get_buffers()
 
     _, _, format_str, _ = col.dtype
-    dbuf, dtype = buffers["data"]
-    # Consider dtype being `uint` to get number of units passed since the 01.01.1970
+    dbuf, data_dtype = buffers["data"]
+
+    if data_dtype[0] == DtypeKind.DATETIME:
+        # temporary workaround to keep backwards compatibility due to
+        # https://github.com/pandas-dev/pandas/issues/54781
+        # Consider dtype being `int` to get number of units passed since 1970-01-01
+        data_dtype = (
+            DtypeKind.INT,
+            data_dtype[1],
+            getattr(ArrowCTypes, f"INT{data_dtype[1]}"),
+            Endianness.NATIVE,
+        )
+
     data = buffer_to_ndarray(
         dbuf,
-        (
-            DtypeKind.UINT,
-            dtype[1],
-            getattr(ArrowCTypes, f"UINT{dtype[1]}"),
-            Endianness.NATIVE,
-        ),
+        data_dtype,
         offset=col.offset,
         length=col.size(),
     )

diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py
@@ -14,6 +14,7 @@
     DtypeKind,
 )
 from pandas.core.interchange.from_dataframe import from_dataframe
+from pandas.core.interchange.utils import ArrowCTypes
 
 
 @pytest.fixture
@@ -326,3 +327,24 @@ def test_interchange_from_non_pandas_tz_aware():
         dtype="datetime64[us, Asia/Kathmandu]",
     )
     tm.assert_frame_equal(expected, result)
+
+
+def test_interchange_from_corrected_buffer_dtypes(monkeypatch) -> None:
+    # https://github.com/pandas-dev/pandas/issues/54781
+    df = pd.DataFrame({"a": ["foo", "bar"]}).__dataframe__()
+    interchange = df.__dataframe__()
+    column = interchange.get_column_by_name("a")
+    buffers = column.get_buffers()
+    buffers_data = buffers["data"]
+    buffer_dtype = buffers_data[1]
 offsets = np.zeros(shape=(len(values) + 1,), dtype=np.int64) 
 offsets = np.zeros(shape=(len(values) + 1,), dtype=np.int64) 
+    buffer_dtype = (
+        DtypeKind.UINT,
+        8,
+        ArrowCTypes.UINT8,
+        buffer_dtype[3],
+    )
+    buffers["data"] = (buffers_data[0], buffer_dtype)
+    column.get_buffers = lambda: buffers
+    interchange.get_column_by_name = lambda _: column
+    monkeypatch.setattr(df, "__dataframe__", lambda allow_copy: interchange)
+    pd.api.interchange.from_dataframe(df)