From 4f2a6bea82f4202c5a847e14009bb5ab244266ba Mon Sep 17 00:00:00 2001 From: nkorinek Date: Thu, 13 Jul 2023 13:23:34 -0600 Subject: [PATCH 1/2] Custom file name for file download --- CHANGELOG.rst | 1 + earthpy/io.py | 23 +++++++++++++- earthpy/tests/test_io.py | 67 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index c6bc34b7..98250b60 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,7 @@ The format is based on `Keep a Changelog ` unreleased ---------- +- Added a custom file name argument to get_data (@nkorinek, #891) - Update contributors to EarthPy (@nkorinek, #886) - Fix issue with Codecov (@nkorinek, #885) - Update dependencies, fix tests, and upgrade to support Python 3.8, 3.9, and 3.10 (@nkorinek, #878) diff --git a/earthpy/io.py b/earthpy/io.py index c7f2c92c..ca53e508 100644 --- a/earthpy/io.py +++ b/earthpy/io.py @@ -150,7 +150,9 @@ def __repr__(self): s = "Available Datasets: {}".format(self.data_keys) return s - def get_data(self, key=None, url=None, replace=False, verbose=True): + def get_data( + self, key=None, url=None, replace=False, verbose=True, file_name=None + ): """ Retrieve the data for a given week and return its path. @@ -194,6 +196,13 @@ def get_data(self, key=None, url=None, replace=False, verbose=True): "The `url` and `key` parameters can not both be " "set at the same time." ) + + if key is not None and file_name is not None: + raise ValueError( + "The `key` and `file_name` parameters can not both " + "be set at the same time." + ) + if key is None and url is None: print(self.__repr__()) return @@ -225,6 +234,18 @@ def get_data(self, key=None, url=None, replace=False, verbose=True): if fname.endswith(ext): file_type = ext + if file_name is not None: + if "." in file_name: + raise ValueError( + "File type extension found in file_name, do not " + "include file type extension in file_name." + ) + if file_type == "file": + ext = fname.split(".")[1] + else: + ext = file_type + fname = "{}.{}".format(file_name, ext) + # remove extension for pretty download paths fname = re.sub("\\.{}$".format(file_type), "", fname) diff --git a/earthpy/tests/test_io.py b/earthpy/tests/test_io.py index 3058fa9f..93d88c5e 100644 --- a/earthpy/tests/test_io.py +++ b/earthpy/tests/test_io.py @@ -116,6 +116,12 @@ def test_key_and_url_set_simultaneously(eld): eld.get_data(key="foo", url="bar") +def test_key_and_file_name_set_simultaneously(eld): + """key and file name should not both be set.""" + with pytest.raises(ValueError, match="can not both be set at the same"): + eld.get_data(key="foo", file_name="bar") + + def test_available_datasets_are_printed(eld, capsys): """If no key or url provided, print datasets. @@ -245,3 +251,64 @@ def test_url_download_with_quotes(eld): path = eld.get_data(url=quotes_url) files = os.listdir(path) assert "City_of_Boulder_City_Limits.shp" in files and os.path.isdir(path) + + +@skip_on_ci +@pytest.mark.vcr() +def test_arbitrary_url_zip_download_custom_fname(eld): + """Verify custom file_name works with zip download.""" + path = eld.get_data( + url=( + "https://www2.census.gov/geo/tiger/GENZ2016/shp" + "/cb_2016_us_nation_20m.zip" + ), + file_name="test_name_zip", + ) + path_has_contents = len(os.listdir(path)) > 0 + assert path_has_contents and "test_name_zip" in path + + +@skip_on_ci +@pytest.mark.vcr() +def test_url_download_tar_file_custom_fname(eld): + """Verify custom file_name works with tar download.""" + path = eld.get_data( + url="https://ndownloader.figshare.com/files/14615411", + file_name="test_name_tar", + ) + assert "abc.txt" in os.listdir(path) and "test_name_tar" in path + + +@skip_on_ci +@pytest.mark.vcr() +def test_url_download_tar_gz_file_custom_fname(eld): + """Verify custom file_name works with tar_gz download.""" + path = eld.get_data( + url="https://ndownloader.figshare.com/files/14615414", + file_name="test_name_targz", + ) + assert "abc.txt" in os.listdir(path) and "test_name_targz" in path + + +@skip_on_ci +@pytest.mark.vcr() +def test_url_download_txt_file_with_content_disposition_custom_fname(eld): + """Verify custom file_name works with arbitrary file_type download.""" + path = eld.get_data( + url="https://ndownloader.figshare.com/files/7275959", + file_name="test_csv", + ) + assert path.endswith("test_csv.csv") and os.path.isfile(path) + + +@skip_on_ci +@pytest.mark.vcr() +def test_file_name_with_extension_fails(eld): + """Test that including an extension in the file_name argument fails.""" + with pytest.raises( + ValueError, match="File type extension found in file_name" + ): + eld.get_data( + url="https://ndownloader.figshare.com/files/7275959", + file_name="test_csv.csv", + ) From 4ff96db768702014d0dc2e67ecff45c33f5e3c15 Mon Sep 17 00:00:00 2001 From: nkorinek Date: Thu, 13 Jul 2023 17:52:43 -0600 Subject: [PATCH 2/2] Added doc string for file_name argument --- earthpy/io.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/earthpy/io.py b/earthpy/io.py index ca53e508..954b73c8 100644 --- a/earthpy/io.py +++ b/earthpy/io.py @@ -173,6 +173,9 @@ def get_data( already downloaded. verbose : bool Whether to print verbose output while downloading files. + file_name : string + Change the file name of files downloaded from urls. Can't be used + with key downloads. Returns -------