Improve fmap for pandas

Resolves #734.
evhub · Apr 30, 2023 · b81719e · b81719e
1 parent 23d4959
commit b81719e
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 11 deletions.
diff --git a/DOCS.md b/DOCS.md
@@ -3168,6 +3168,8 @@ For `dict`, or any other `collections.abc.Mapping`, `fmap` will map over the map
 
 For [`numpy`](#numpy-integration) objects, `fmap` will use [`np.vectorize`](https://docs.scipy.org/doc/numpy/reference/generated/numpy.vectorize.html) to produce the result.
 
+For [`pandas`](https://pandas.pydata.org/) objects, `fmap` will use [`.apply`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html) along the last axis (so row-wise for `DataFrame`'s, element-wise for `Series`'s).
+
 For asynchronous iterables, `fmap` will map asynchronously, making `fmap` equivalent in that case to
 ```coconut_python
 async def fmap_over_async_iters(func, async_iter):
@@ -3198,7 +3200,7 @@ _Can't be done without a series of method definitions for each data type. See th
 
 **call**(_func_, /, *_args_, \*\*_kwargs_)
 
-Coconut's `call` simply implements function application. Thus, `call` is equivalent to
+Coconut's `call` simply implements function application. Thus, `call` is effectively equivalent to
 ```coconut
 def call(f, /, *args, **kwargs) = f(*args, **kwargs)
 ```

diff --git a/coconut/compiler/templates/header.py_template b/coconut/compiler/templates/header.py_template
@@ -1486,16 +1486,15 @@ def fmap(func, obj, **kwargs):
             if result is not _coconut.NotImplemented:
                 return result
     obj_module = _coconut_get_base_module(obj)
+    if obj_module in _coconut.pandas_numpy_modules:
+        if obj.ndim <= 1:
+            return obj.apply(func)
+        return obj.apply(func, axis=obj.ndim-1)
     if obj_module in _coconut.jax_numpy_modules:
         import jax.numpy as jnp
         return jnp.vectorize(func)(obj)
     if obj_module in _coconut.numpy_modules:
-        got = _coconut.numpy.vectorize(func)(obj)
-        if obj_module in _coconut.pandas_numpy_modules:
-            new_obj = obj.copy()
-            new_obj[:] = got
-            return new_obj
-        return got
+        return _coconut.numpy.vectorize(func)(obj)
     obj_aiter = _coconut.getattr(obj, "__aiter__", None)
     if obj_aiter is not None and _coconut_amap is not None:
         try:

diff --git a/coconut/root.py b/coconut/root.py
@@ -26,7 +26,7 @@
 VERSION = "3.0.0"
 VERSION_NAME = None
 # False for release, int >= 1 for develop
-DEVELOP = 39
+DEVELOP = 40
 ALPHA = True  # for pre releases rather than post releases
 
 # -----------------------------------------------------------------------------------------------------------------------

diff --git a/coconut/tests/src/extras.coco b/coconut/tests/src/extras.coco
@@ -472,9 +472,9 @@ def test_pandas() -> bool:
     assert [d1; d1].keys() |> list == ["nums", "chars"] * 2  # type: ignore
     assert [d1;; d1].itertuples() |> list == [(0, 1, 'a'), (1, 2, 'b'), (2, 3, 'c'), (0, 1, 'a'), (1, 2, 'b'), (2, 3, 'c')]  # type: ignore
     d2 = pd.DataFrame({"a": range(3) |> list, "b": range(1, 4) |> list})
-    new_d2 = d2 |> fmap$(.+1)
-    assert new_d2["a"] |> list == range(1, 4) |> list
-    assert new_d2["b"] |> list == range(2, 5) |> list
+    d3 = d2 |> fmap$(fmap$(.+1))
+    assert d3["a"] |> list == range(1, 4) |> list
+    assert d3["b"] |> list == range(2, 5) |> list
     assert multi_enumerate(d1) |> list == [((0, 0), 1), ((1, 0), 2), ((2, 0), 3), ((0, 1), 'a'), ((1, 1), 'b'), ((2, 1), 'c')]
     assert not all_equal(d1)
     assert not all_equal(d2)
@@ -489,6 +489,8 @@ def test_pandas() -> bool:
         3; 'b';;
         3; 'c';;
     ], dtype=object)  # type: ignore
+    d4 = d1 |> fmap$(def r -> r["nums2"] = r["nums"]*2; r)
+    assert (d4["nums"] * 2 == d4["nums2"]).all()
     return True