8000 python: add `props_cached` and `props` accessor to Object · NVIDIA/aistore@b1a8e29 · GitHub
[go: up one dir, main page]
More Web Proxy on the site http://driver.im/
Skip to content

Commit b1a8e29

Browse files
python: add props_cached and props accessor to Object
- props: ensures object properties are refreshed via HEAD request on every access - props_cached: returns cached properties without triggering a network call - Added tests to validate presence of 'Ais-Present' header in ObjectProps Signed-off-by: Abhishek Gaikwad <gaikwadabhishek1997@gmail.com>
1 parent 5ff13fd commit b1a8e29

File tree

7 files changed

+131
-26
lines changed

7 files changed

+131
-26
lines changed

python/CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,10 @@ We structure this changelog in accordance with [Keep a Changelog](https://keepac
99
## Unreleased
1010

1111
### Added
12+
- `props_cached` returns cached properties without triggering a network call.
1213

1314
### Changed
15+
- `props` accessor ensures object properties are refreshed via HEAD request on every access.
1416

1517
### Removed
1618

python/aistore/pytorch/dynamic_sampler.py

Lines changed: 24 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,20 @@
66
so that there is a guarantee that each batch fits within memory
77
while attempting to fit the maximum number of samples in each batch.
88
9-
Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
9+
Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
1010
"""
1111

12-
import torch
12+
from logging import getLogger
1313
from typing import Iterator, List
14+
import torch
1415
from aistore.pytorch.base_map_dataset import AISBaseMapDataset
15-
from logging import getLogger
16-
1716

1817
# Default saturation of a batch needed to not be dropped with drop_last=True
1918
SATURATION_FACTOR = 0.8
2019
logger = getLogger(__name__)
2120

2221

22+
# pylint: disable=too-many-instance-attributes, too-many-arguments
2323
class DynamicBatchSampler(torch.utils.data.Sampler):
2424
"""
2525
@@ -32,11 +32,12 @@ class DynamicBatchSampler(torch.utils.data.Sampler):
3232
Args:
3333
data_source (AISBaseMapDataset): Base AIS map-style dataset to sample from to create dynamic mini-batches.
3434
max_batch_size (float): Maximum size of mini-batch in bytes.
35-
drop_last (bool, optional): If `True`, then will drop last batch if the batch is not atleast 80% of `max_batch_size`.
36-
Defaults to `False`.
37-
allow_oversized_samples (bool, optional): If `True`, then any sample that is larger than the `max_batch_size` will be processed
38-
in its own min-batch by itself instead of being dropped. Defaults to `False`.
39-
saturation_factor (float, optional): Saturation of a batch needed to not be dropped with `drop_last=True`. Default is `0.8`.
35+
drop_last (bool, optional): If `True`, then will drop last batch if the batch is not atleast 80% of
36+
`max_batch_size`. Defaults to `False`.
37+
allow_oversized_samples (bool, optional): If `True`, then any sample that is larger than the `max_batch_size`
38+
will be processed in its own min-batch by itself instead of being dropped. Defaults to `False`.
39+
saturation_factor (float, optional): Saturation of a batch needed to not be dropped with `drop_last=True`.
40+
Default is `0.8`.
4041
shuffle (bool, optional): Randomizes order of samples before calculating mini-batches. Default is `False`.
4142
"""
4243

@@ -55,10 +56,11 @@ def __init__(
5556
self._samples_list = data_source.get_obj_list()
5657
self._drop_last = drop_last
5758
self._allow_oversized_samples = allow_oversized_samples
58-
if not (0 <= saturation_factor <= 1):
59-
raise ValueError(f"`saturation_factor` must be between 0 and 1")
59+
if not 0 <= saturation_factor <= 1:
60+
raise ValueError("`saturation_factor` must be between 0 and 1")
6061
self._saturation_factor = saturation_factor
6162
self._shuffle = shuffle
63+
self._indices = None
6264

6365
def __iter__(self) -> Iterator[List[int]]:
6466
"""
@@ -78,31 +80,35 @@ def __iter__(self) -> Iterator[List[int]]:
7880
while index < len(self._samples_list):
7981
sample = self._samples_list[index]
8082

81-
if sample.props.size == 0:
83+
if sample.props_cached.size == 0:
8284
logger.warning(
83-
f"Sample {sample.name} cannot be processed as it has a size of 0 bytes"
85+
"Sample %s cannot be processed as it has a size of 0 bytes",
86+
sample.name,
8487
)
8588
index = self._get_next_index(index)
8689
continue
8790

88-
if sample.props.size > self._max_batch_size:
91+
if sample.props_cached.size > self._max_batch_size:
8992
if self._allow_oversized_samples is True:
9093
yield [index]
9194
else:
9295
logger.warning(
93-
f"Sample {sample.name} cannot be processed as it is larger than the max batch size: {sample.props.size} bytes > {self._max_batch_size} bytes"
96+
"Sample %s cannot be processed as it is larger than the max batch size: %d bytes > %d bytes",
97+
sample.name,
98+
sample.props_cached.size,
99+
self._max_batch_size,
94100
)
95101

96102
index = self._get_next_index(index)
97103
continue
98104

99-
if total_mem + sample.props.size < self._max_batch_size:
105+
if total_mem + sample.props_cached.size < self._max_batch_size:
100106
batch.append(index)
101107
index = self._get_next_index(index)
102-
total_mem += sample.props.size
108+
total_mem += sample.props_cached.size
103109
else:
104110

105-
if total_mem + sample.props.size == self._max_batch_size:
111+
if total_mem + sample.props_cached.size == self._max_batch_size:
106112
batch.append(index)
107113
index = self._get_next_index(index)
108114

python/aistore/sdk/obj/object.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,31 @@ def name(self) -> str:
108108

109109
@property
110110
def props(self) -> ObjectProps:
111-
"""Properties of this object."""
111+
"""
112+
Get the latest properties of the object.
113+
114+
This will make a HEAD request to the AIStore cluster to fetch up-to-date object headers
115+
and refresh the internal `_props` cache. Use this when you want to ensure you're accessing
116+
the most recent metadata for the object.
117+
118+
Returns:
119+
ObjectProps: The latest object properties from the server.
120+
"""
121+
self.head()
122+
return self._props
123+
124+
@property
125+
def props_cached(self) -> Optional[ObjectProps]:
126+
"""
127+
Get the cached object properties (without making a network call).
128+
129+
This is useful when:
130+
- You want to avoid a network request.
131+
- You're sure the cached `_props` was already set via a previous call to `head()` or during object construction.
132+
133+
Returns:
134+
ObjectProps or None: Cached object properties, or None if not set.
135+
"""
112136
return self._props
113137

114138
def head(self) -> CaseInsensitiveDict:

python/tests/integration/sdk/test_object_ops.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,45 @@ def test_get_blob_download(self, testcase):
233233
)
234234
self.assertTrue(len(jobs_list) > 0)
235235

236+
@unittest.skipUnless(REMOTE_SET, "Remote bucket is not set")
237+
def test_obj_present(self):
238+
"""
239+
Test the `Ais-Present` property of an object.
240+
This test ensures that the `present` property is correctly set for object.
241+
"""
242+
# Create an object
243+
obj = self._create_object()
244+
obj.get_writer().put_content(b"test content")
245+
246+
# Verify the object is present
247+
self.assertEqual(
248+
True,
249+
obj.props.present,
250+
msg="The object should be present after putting content.",
251+
)
252+
253+
# Evict the object
254+
evict_job_id = self.bucket.objects(obj_names=[obj.name]).evict()
255+
self.client.job(job_id=evict_job_id).wait(timeout=TEST_TIMEOUT)
256+
257+
# Check the `Ais-Present` attribute after eviction
258+
# Note: `Ais-Present` should be "false" after eviction
259+
self.assertEqual(
260+
False,
261+
obj.props.present,
262+
msg="The object should not be present after eviction.",
263+
)
264+
265+
# Get the entire object
266+
obj.get_reader().read_all()
267+
268+
# Verify the object is present
269+
self.assertEqual(
270+
True,
271+
obj.props.present,
272+
msg="The object should be present after reading entire content.",
273+
)
274+
236275
@unittest.skipIf(
237276
"localhost" not in CLUSTER_ENDPOINT and "127.0.0.1" not in CLUSTER_ENDPOINT,
238277
"Cannot test promote without access to AIS cluster file storage",

python/tests/unit/pytorch/test_samplers.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ def setUp(self) -> None:
1919

2020
self.data = b"\0" * 1000 # 1kb
2121
mock_obj.get_reader.return_value.read_all.return_value = self.data
22-
mock_obj.props.size = len(self.data)
22+
mock_obj.props_cached.size = len(self.data)
2323
mock_obj.name = "test_obj"
2424

2525
self.mock_objects = [mock_obj for _ in range(10)] # 10 objects total
@@ -105,7 +105,7 @@ def test_dynamic_sampler_oversized_drop_last(self):
105105
mock_obj = Mock(Object)
106106
large_data = b"\0" * 6000 # 6kb
107107
mock_obj.get_reader.return_value.read_all.return_value = large_data
108-
mock_obj.props.size = len(large_data)
108+
mock_obj.props_cached.size = len(large_data)
109109
mock_obj.name = "test_obj"
110110

111111
self.mock_objects.append(mock_obj)

python/tests/unit/sdk/obj/test_object.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ def test_properties(self):
7373
self.assertEqual("ais", self.object.bucket_provider)
7474
self.assertEqual(self.bck_qparams, self.object.query_params)
7575
self.assertEqual(OBJ_NAME, self.object.name)
76-
self.assertIsNone(self.object.props)
76+
self.assertIsNone(self.object.props_cached)
77+
self.assertIsInstance(self.object.props, ObjectProps)
7778

7879
def test_head(self):
7980
self.object.head()
@@ -410,7 +411,7 @@ def test_object_props(self):
410411

411412
self.mock_client.request.return_value = Mock(headers=headers)
412413

413-
self.assertEqual(self.object.props, None)
414+
self.assertEqual(self.object.props_cached, None)
414415

415416
self.object.head()
416417

python/tests/unit/sdk/test_bucket.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,15 @@
4141
STATUS_ACCEPTED,
4242
STATUS_BAD_REQUEST,
4343
STATUS_OK,
44+
AIS_BCK_NAME,
45+
AIS_OBJ_NAME,
46+
AIS_MIRROR_PATHS,
47+
AIS_PRESENT,
48+
AIS_BCK_PROVIDER,
49+
AIS_LOCATION,
50+
AIS_MIRROR_COPIES,
4451
)
52+
4553
from aistore.sdk.dataset.dataset_config import DatasetConfig
4654
from aistore.sdk.errors import (
4755
InvalidBckProvider,
@@ -545,14 +553,39 @@ def _transform_exec_assert(self, etl_name, expected_act_value, **kwargs):
545553

546554
def test_object(self):
547555
obj_name = "testobject"
548-
props = ObjectProps(CaseInsensitiveDict({"testkey": "testval"}))
556+
props_dict = CaseInsensitiveDict(
557+
{
558+
AIS_BCK_NAME: "test-bck-name",
559+
AIS_BCK_PROVIDER: "ais",
560+
AIS_OBJ_NAME: obj_name,
561+
AIS_LOCATION: "/sda/test-location",
562+
AIS_MIRROR_PATHS: "path1,path2",
563+
AIS_MIRROR_COPIES: "2",
564+
AIS_PRESENT: "true",
565+
}
566+
)
567+
props = ObjectProps(props_dict)
549568

550569
new_obj = self.ais_bck.object(obj_name=obj_name, props=props)
551-
552570
self.assertEqual(self.ais_bck.name, new_obj.bucket_name)
553571
self.assertEqual(self.ais_bck.provider, new_obj.bucket_provider)
554572
self.assertEqual(self.ais_bck.qparam, new_obj.query_params)
555-
self.assertEqual(props, new_obj.props)
573+
574+
self.assertEqual(props, new_obj.props_cached)
575+
576+
# Mock response with a headers attribute
577+
mock_response = Mock()
578+
mock_response.headers = props_dict
579+
580+
# Set mock return value for HEAD request
581+
self.mock_client.request.return_value = mock_response
582+
self.assertEqual(props.present, new_obj.props.present)
583+
self.assertEqual(props.access_time, new_obj.props.access_time)
584+
self.assertEqual(props.location, new_obj.props.location)
585+
self.assertEqual(props.bucket_name, new_obj.props.bucket_name)
586+
self.assertEqual(props.bucket_provider, new_obj.props.bucket_provider)
587+
self.assertEqual(props.mirror_copies, new_obj.props.mirror_copies)
588+
self.assertEqual(props.mirror_paths, new_obj.props.mirror_paths)
556589

557590
@patch("aistore.sdk.obj.object_writer.validate_file")
558591
@patch("aistore.sdk.bucket.validate_directory")

0 commit comments

Comments
 (0)
0