8000 Move Several Series Function to Frame by isVoid · Pull Request #9394 · rapidsai/cudf · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Move Several Series Function to Frame #9394

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 121 additions & 0 deletions python/cudf/cudf/core/frame.py
< 10000 td class="blob-num blob-num-addition empty-cell">
Original file line number Diff line number Diff line change
Expand Up @@ -3487,6 +3487,127 @@ def sqrt(self):
"""
return self._unaryop("sqrt")

def abs(self):
"""
Return a Series/DataFrame with absolute numeric value of each element.

This function only applies to elements that are all numeric.

Returns
-------
DataFrame/Series
Absolute value of each element.

Examples
--------
Absolute numeric values in a Series

>>> s = cudf.Series([-1.10, 2, -3.33, 4])
>>> s.abs()
0 1.10
1 2.00
2 3.33
3 4.00
dtype: float64
"""
return self._unaryop("abs")

# Rounding
def ceil(self):
"""
Rounds each value upward to the smallest integral value not less
than the original.

Returns
-------
DataFrame or Series
Ceiling value of each element.

Examples
--------
>>> import cudf
>>> series = cudf.Series([1.1, 2.8, 3.5, 4.5])
>>> series
0 1.1
1 2.8
2 3.5
3 4.5
dtype: float64
>>> series.ceil()
0 2.0
1 3.0
2 4.0
3 5.0
dtype: float64
"""
return self._unaryop("ceil")

def floor(self):
"""Rounds each value downward to the largest integral value not greater
than the original.

Returns
-------
DataFrame or Series
Flooring value of each element.

Examples
--------
>>> import cudf
>>> series = cudf.Series([-1.9, 2, 0.2, 1.5, 0.0, 3.0])
>>> series
0 -1.9
1 2.0
2 0.2
3 1.5
4 0.0
5 3.0
dtype: float64
>>> series.floor()
0 -2.0
1 2.0
2 0.0
3 1.0
4 0.0
5 3.0
dtype: float64
"""
return self._unaryop("floor")

def scale(self):
"""
Scale values to [0, 1] in float64

Returns
-------
DataFrame or Series
Values scaled to [0, 1].

Examples
--------
>>> import cudf
>>> series = cudf.Series([10, 11, 12, 0.5, 1])
>>> series
0 10.0
1 11.0
2 12.0
3 0.5
4 1.0
dtype: float64
>>> series.scale()
0 0.826087
1 0.913043
2 1.000000
3 0.000000
4 0.043478
dtype: float64
"""
vmin = self.min()
vmax = self.max()
scaled = (self - vmin) / (vmax - vmin)
scaled._index = self._index.copy(deep=False)
return scaled

def _merge(
self,
right,
Expand Down
124 changes: 0 additions & 124 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3476,130 +3476,6 @@ def value_counts(
res = res / float(res._column.sum())
return res

def scale(self):
"""
Scale values to [0, 1] in float64

Returns
-------
Series
A new series with values scaled to [0, 1].

Examples
--------
>>> import cudf
>>> series = cudf.Series([10, 11, 12, 0.5, 1])
>>> series
0 10.0
1 11.0
2 12.0
3 0.5
4 1.0
dtype: float64
>>> series.scale()
0 0.826087
1 0.913043
2 1.000000
3 0.000000
4 0.043478
dtype: float64
"""
vmin = self.min()
vmax = self.max()
scaled = (self - vmin) / (vmax - vmin)
scaled._index = self._index.copy(deep=False)
return scaled

# Absolute
def abs(self):
"""Absolute value of each element of the series.

Returns
-------
abs
Series containing the absolute value of each element.

Examples
--------
>>> 10000 import cudf
>>> series = cudf.Series([-1.10, 2, -3.33, 4])
>>> series
0 -1.10
1 2.00
2 -3.33
3 4.00
dtype: float64
>>> series.abs()
0 1.10
1 2.00
2 3.33
3 4.00
dtype: float64
"""
return self._unaryop("abs")

# Rounding
def ceil(self):
"""
Rounds each value upward to the smallest integral value not less
than the original.

Returns
-------
res
Returns a new Series with ceiling value of each element.

Examples
--------
>>> import cudf
>>> series = cudf.Series([1.1, 2.8, 3.5, 4.5])
>>> series
0 1.1
1 2.8
2 3.5
3 4.5
dtype: float64
>>> series.ceil()
0 2.0
1 3.0
2 4.0
3 5.0
dtype: float64
"""
return self._unaryop("ceil")

def floor(self):
"""Rounds each value downward to the largest integral value not greater
than the original.

Returns
-------
res
Returns a new Series with floor of each element.

Examples
--------
>>> import cudf
>>> series = cudf.Series([-1.9, 2, 0.2, 1.5, 0.0, 3.0])
>>> series
0 -1.9
1 2.0
2 0.2
3 1.5
4 0.0
5 3.0
dtype: float64
>>> series.floor()
0 -2.0
1 2.0
2 0.0
3 1.0
4 0.0
5 3.0
dtype: float64
"""
return self._unaryop("floor")

def hash_values(self, method="murmur3"):
"""Compute the hash of values in this column.

Expand Down
18 changes: 18 additions & 0 deletions python/cudf/cudf/tests/test_dataframe.py
6D4E
Original file line number Diff line number Diff line change
Expand Up @@ -2068,6 +2068,24 @@ def test_unaryops_df(pdf, gdf, unaryop):
assert_eq(d, g)


@pytest.mark.parametrize("unary_func", ["abs", "floor", "ceil"])
def test_unary_func_df(pdf, unary_func):
np.random.seed(0)
disturbance = pd.Series(np.random.rand(10))
pdf = pdf - 5 + disturbance
d = pdf.apply(getattr(np, unary_func))
g = getattr(cudf.from_pandas(pdf), unary_func)()
assert_eq(d, g)


def test_scale_df(gdf):
got = (gdf - 5).scale()
expect = cudf.DataFrame(
{"x": np.linspace(0.0, 1.0, 10), "y": np.linspace(0.0, 1.0, 10)}
)
assert_eq(expect, got)


@pytest.mark.parametrize(
"func",
[
Expand Down
0