From 789a02eef9524ddb154443cc11ce229ac08085f2 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 7 Nov 2022 10:24:45 -0600
Subject: [PATCH 01/12] FIX-#5200: Use squeeze parameter instead of
 SeriesGroupby

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index dcd990f13b2..8d6f6c2b628 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -425,13 +425,15 @@ def __getitem__(self, key):
         if is_list_like(key):
             make_dataframe = True
         else:
+            key = [key]
             if self._as_index:
                 make_dataframe = False
             else:
                 make_dataframe = True
-                key = [key]
+        internal_by = frozenset(self._internal_by)
+        cols_to_grab = internal_by.union(key)
+        key = [col for col in self._df.columns if col in cols_to_grab]
         if make_dataframe:
-            internal_by = frozenset(self._internal_by)
             if len(internal_by.intersection(key)) != 0:
                 ErrorMessage.missmatch_with_pandas(
                     operation="GroupBy.__getitem__",
@@ -443,8 +445,6 @@ def __getitem__(self, key):
                         + "df.groupby(df['by_column'].copy())['by_column']"
                     ),
                 )
-            cols_to_grab = internal_by.union(key)
-            key = [col for col in self._df.columns if col in cols_to_grab]
             return DataFrameGroupBy(
                 self._df[key],
                 drop=self._drop,
@@ -459,7 +459,8 @@ def __getitem__(self, key):
                 "Column lookups on GroupBy with arbitrary Series in by"
                 + " is not yet supported."
             )
-        return SeriesGroupBy(
+        kwargs["squeeze"] = True
+        return DataFrameGroupBy(
             self._df[key],
             drop=False,
             **kwargs,

From e48fdd4b980742c978a454dd17d7ce10f291b591 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Thu, 17 Nov 2022 14:36:30 -0600
Subject: [PATCH 02/12] Move code

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 8d6f6c2b628..d18e1509c70 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -459,8 +459,7 @@ def __getitem__(self, key):
                 "Column lookups on GroupBy with arbitrary Series in by"
                 + " is not yet supported."
             )
-        kwargs["squeeze"] = True
-        return DataFrameGroupBy(
+        return SeriesGroupBy(
             self._df[key],
             drop=False,
             **kwargs,
@@ -1195,6 +1194,24 @@ def groupby_on_multiple_columns(df, *args, **kwargs):
 
 @_inherit_docstrings(pandas.core.groupby.SeriesGroupBy)
 class SeriesGroupBy(SeriesGroupByCompat, DataFrameGroupBy):
+
+    def __init__(
+            self,
+            df,
+            by,
+            axis,
+            level,
+            as_index,
+            sort,
+            group_keys,
+            squeeze,
+            idx_name,
+            drop,
+            **kwargs,
+    ):
+        super(SeriesGroupBy, self).__init__(df, by, axis, level, as_index, sort, group_keys, squeeze, idx_name, drop, **kwargs)
+        self._squeeze = True
+
     @property
     def ndim(self):
         """

From 268f9da8dfc52c0e5575c1117b189c92951dde2d Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Tue, 22 Nov 2022 11:48:12 -0600
Subject: [PATCH 03/12] Lint

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index d18e1509c70..48df124c32b 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1194,9 +1194,21 @@ def groupby_on_multiple_columns(df, *args, **kwargs):
 
 @_inherit_docstrings(pandas.core.groupby.SeriesGroupBy)
 class SeriesGroupBy(SeriesGroupByCompat, DataFrameGroupBy):
-
     def __init__(
-            self,
+        self,
+        df,
+        by,
+        axis,
+        level,
+        as_index,
+        sort,
+        group_keys,
+        squeeze,
+        idx_name,
+        drop,
+        **kwargs,
+    ):
+        super(SeriesGroupBy, self).__init__(
             df,
             by,
             axis,
@@ -1208,8 +1220,7 @@ def __init__(
             idx_name,
             drop,
             **kwargs,
-    ):
-        super(SeriesGroupBy, self).__init__(df, by, axis, level, as_index, sort, group_keys, squeeze, idx_name, drop, **kwargs)
+        )
         self._squeeze = True
 
     @property

From 1c9ff0ab67695f770324155339dc8866a5c05970 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 28 Nov 2022 10:50:36 -0600
Subject: [PATCH 04/12] Fix the bugs

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py    | 16 +++++++++++++++-
 modin/pandas/test/utils.py |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 48df124c32b..e1acc608cea 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -461,7 +461,7 @@ def __getitem__(self, key):
             )
         return SeriesGroupBy(
             self._df[key],
-            drop=False,
+            drop=True,
             **kwargs,
         )
 
@@ -1223,6 +1223,12 @@ def __init__(
         )
         self._squeeze = True
 
+    def _default_to_pandas(self, f, *args, **kwargs):
+        intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs)
+        if not isinstance(intermediate, Series) and self._squeeze:
+            return intermediate.squeeze(axis=1)
+        return intermediate
+
     @property
     def ndim(self):
         """
@@ -1276,6 +1282,14 @@ def _iter(self):
                 for k in (sorted(group_ids) if self._sort else group_ids)
             )
 
+    def aggregate(self, func=None, *args, **kwargs):
+        if isinstance(func, (list, dict)):
+            self._squeeze = False
+        result = super(SeriesGroupBy, self).aggregate(func, *args, **kwargs)
+        self._squeeze = True
+        return result
+
+    agg = aggregate
 
 if IsExperimental.get():
     from modin.experimental.cloud.meta_magic import make_wrapped_class
diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 21565286309..9287607f8a8 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -615,6 +615,7 @@ def df_equals(df1, df2):
     elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray):
         np.testing.assert_array_equal(df1, df2)
     else:
+        raise ValueError(f"type {type(df1)} not equal type {type(df2)}\n\n{df1}\n\n{df2}")
         if df1 != df2:
             np.testing.assert_almost_equal(df1, df2)
 

From d7c621e29cacf34fa24bcd176d1db216350ed873 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 28 Nov 2022 10:53:49 -0600
Subject: [PATCH 05/12] lint

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py    | 1 +
 modin/pandas/test/utils.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index e1acc608cea..b143c9b7536 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1291,6 +1291,7 @@ def aggregate(self, func=None, *args, **kwargs):
 
     agg = aggregate
 
+
 if IsExperimental.get():
     from modin.experimental.cloud.meta_magic import make_wrapped_class
 
diff --git a/modin/pandas/test/utils.py b/modin/pandas/test/utils.py
index 156c1860a90..412ebed29bd 100644
--- a/modin/pandas/test/utils.py
+++ b/modin/pandas/test/utils.py
@@ -626,7 +626,6 @@ def df_equals(df1, df2):
     elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray):
         np.testing.assert_array_equal(df1, df2)
     else:
-        raise ValueError(f"type {type(df1)} not equal type {type(df2)}\n\n{df1}\n\n{df2}")
         if df1 != df2:
             np.testing.assert_almost_equal(df1, df2)
 

From 79d57c723d2c01e45eb32c445ab9920eee00bac1 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 28 Nov 2022 10:59:38 -0600
Subject: [PATCH 06/12] Add docstring

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index b143c9b7536..a09d8892c17 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1224,6 +1224,23 @@ def __init__(
         self._squeeze = True
 
     def _default_to_pandas(self, f, *args, **kwargs):
+        """
+        Execute function `f` in default-to-pandas way.
+
+        Parameters
+        ----------
+        f : callable
+            The function to apply to each group.
+        *args : list
+            Extra positional arguments to pass to `f`.
+        **kwargs : dict
+            Extra keyword arguments to pass to `f`.
+
+        Returns
+        -------
+        modin.pandas.Series or modin.pandas.DataFrame
+            A new Modin Series or DataFrame with the result of the pandas function.
+        """
         intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs)
         if not isinstance(intermediate, Series) and self._squeeze:
             return intermediate.squeeze(axis=1)

From ab4948ab4d924e0521155075995f0ea9e11e68d9 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Mon, 28 Nov 2022 13:56:01 -0600
Subject: [PATCH 07/12] Fix bug

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index a09d8892c17..55bdffd1bb4 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1243,7 +1243,7 @@ def _default_to_pandas(self, f, *args, **kwargs):
         """
         intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs)
         if not isinstance(intermediate, Series) and self._squeeze:
-            return intermediate.squeeze(axis=1)
+            return intermediate.squeeze(axis=self._axis ^ 1)
         return intermediate
 
     @property

From 1e59dd58e9a674ee08b40f803d4a713f4e177e1f Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Wed, 30 Nov 2022 10:32:36 -0600
Subject: [PATCH 08/12] Fix squeeze issue

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 55bdffd1bb4..de5d01d9db9 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1241,8 +1241,11 @@ def _default_to_pandas(self, f, *args, **kwargs):
         modin.pandas.Series or modin.pandas.DataFrame
             A new Modin Series or DataFrame with the result of the pandas function.
         """
+        squeeze = self._squeeze
+        self._squeeze = False
         intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs)
-        if not isinstance(intermediate, Series) and self._squeeze:
+        self._squeeze = squeeze
+        if not isinstance(intermediate, Series) and squeeze:
             return intermediate.squeeze(axis=self._axis ^ 1)
         return intermediate
 

From e436f3e4cffc2282df7aa0cf1121b041df9845b9 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Wed, 30 Nov 2022 10:47:39 -0600
Subject: [PATCH 09/12] Fix

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index de5d01d9db9..df1e1c7c47c 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -687,8 +687,6 @@ def size(self):
                 if MODIN_UNNAMED_SERIES_LABEL in result.columns
                 else result
             )
-        elif isinstance(self._df, Series):
-            result.name = self._df.name
         else:
             result.name = None
         return result.fillna(0)
@@ -1265,6 +1263,14 @@ def ndim(self):
         """
         return 1  # ndim is always 1 for Series
 
+    def size(self):
+        intermediate = super(SeriesGroupBy, self).size()
+        if isinstance(self._df, Series):
+            intermediate.name = self._df.name
+        else:
+            intermediate.name = next(col_name for col_name in self._df.columns if col_name not in self._internal_by)
+        return intermediate
+
     @property
     def _iter(self):
         """

From d0c8b932738d3af21f353193c9e514b98a36b688 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Wed, 30 Nov 2022 11:19:46 -0600
Subject: [PATCH 10/12] Lint

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index df1e1c7c47c..c26e28b66e3 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1268,7 +1268,11 @@ def size(self):
         if isinstance(self._df, Series):
             intermediate.name = self._df.name
         else:
-            intermediate.name = next(col_name for col_name in self._df.columns if col_name not in self._internal_by)
+            intermediate.name = next(
+                col_name
+                for col_name in self._df.columns
+                if col_name not in self._internal_by
+            )
         return intermediate
 
     @property

From 982847108ddbe46ac1342bc0d5d4b595ab8fc458 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Wed, 30 Nov 2022 12:43:00 -0600
Subject: [PATCH 11/12] Fixes

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index c26e28b66e3..3674141321b 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1239,12 +1239,14 @@ def _default_to_pandas(self, f, *args, **kwargs):
         modin.pandas.Series or modin.pandas.DataFrame
             A new Modin Series or DataFrame with the result of the pandas function.
         """
-        squeeze = self._squeeze
-        self._squeeze = False
+        self._df = self._df[
+            next(
+                col_name
+                for col_name in self._df.columns
+                if col_name not in self._internal_by
+            )
+        ]
         intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs)
-        self._squeeze = squeeze
-        if not isinstance(intermediate, Series) and squeeze:
-            return intermediate.squeeze(axis=self._axis ^ 1)
         return intermediate
 
     @property

From f1981652fa517a861554b3a0c1a7bea40542f5e3 Mon Sep 17 00:00:00 2001
From: Devin Petersohn <devin.petersohn@gmail.com>
Date: Wed, 30 Nov 2022 12:45:28 -0600
Subject: [PATCH 12/12] Cache old df

Signed-off-by: Devin Petersohn <devin.petersohn@gmail.com>
---
 modin/pandas/groupby.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py
index 3674141321b..a2c447da777 100644
--- a/modin/pandas/groupby.py
+++ b/modin/pandas/groupby.py
@@ -1239,6 +1239,7 @@ def _default_to_pandas(self, f, *args, **kwargs):
         modin.pandas.Series or modin.pandas.DataFrame
             A new Modin Series or DataFrame with the result of the pandas function.
         """
+        old_df = self._df
         self._df = self._df[
             next(
                 col_name
@@ -1247,6 +1248,7 @@ def _default_to_pandas(self, f, *args, **kwargs):
             )
         ]
         intermediate = super(SeriesGroupBy, self)._default_to_pandas(f, *args, **kwargs)
+        self._df = old_df
         return intermediate
 
     @property