From ff1f5009bfc274d5ef79567fc59283f6a07228c0 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Mon, 9 Aug 2021 17:36:30 +0200 Subject: [PATCH 1/2] test: refactor test --- tests/unit/types/arrays/test_neural_ops.py | 47 ++++++---------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/tests/unit/types/arrays/test_neural_ops.py b/tests/unit/types/arrays/test_neural_ops.py index 591050a5c432d..a092476de81e1 100644 --- a/tests/unit/types/arrays/test_neural_ops.py +++ b/tests/unit/types/arrays/test_neural_ops.py @@ -13,7 +13,8 @@ from jina.math.dimensionality_reduction import PCA -def get_docs(): +@pytest.fixture() +def doc_lists(): d1 = Document(embedding=np.array([0, 0, 0])) d2 = Document(embedding=np.array([3, 0, 0])) d3 = Document(embedding=np.array([1, 0, 0])) @@ -28,36 +29,6 @@ def get_docs(): return [d1, d2, d3, d4], [d1_m, d2_m, d3_m, d4_m, d5_m] -def gen_docarrays(is_dam1=False, is_dam2=False, buffer_pool_size=1000): - def func(): - D1, D2 = get_docs() - if is_dam1: - da1 = DocumentArrayMemmap( - tempfile.mkdtemp(), buffer_pool_size=buffer_pool_size - ) - else: - da1 = DocumentArray() - - if is_dam2: - da2 = DocumentArrayMemmap( - tempfile.mkdtemp(), buffer_pool_size=buffer_pool_size - ) - else: - da2 = DocumentArray() - da1.extend(D1) - da2.extend(D2) - return da1, da2 - - return func - - -def gen_docarray_combinations(): - return [ - gen_docarrays(is_dam1, is_dam2) - for is_dam1, is_dam2 in product((True, False), (True, False)) - ] + [gen_docarrays(False, True, buffer_pool_size=3)] - - @pytest.fixture def docarrays_for_embedding_distance_computation(): D1, D2 = get_docs() @@ -89,12 +60,20 @@ def embeddings(): return np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) -@pytest.mark.parametrize('gen_doc_arrays', gen_docarray_combinations()) +@pytest.mark.parametrize('first_memmap', [True, False]) +@pytest.mark.parametrize('second_memmap', [True, False]) @pytest.mark.parametrize( 'limit, batch_size', [(1, None), (2, None), (None, None), (1, 1), (1, 2), (2, 1)] ) -def test_matching_retrieves_correct_number(gen_doc_arrays, limit, batch_size): - D1, D2 = gen_doc_arrays() +def test_matching_retrieves_correct_number( + doc_lists, limit, batch_size, first_memmap, second_memmap, tmpdir +): + doc_list1, doc_list2 = doc_lists + + D1 = DocumentArray() if not first_memmap else DocumentArrayMemmap(tmpdir) + D1.extend(doc_list1) + D2 = DocumentArray() if not first_memmap else DocumentArrayMemmap(tmpdir) + D2.extend(doc_list2) D1.match(D2, metric='sqeuclidean', limit=limit, batch_size=batch_size) for m in D1.get_attributes('matches'): if limit is None: From cf02424cf11117c1ea3ef0900c3184d3f7f64e8e Mon Sep 17 00:00:00 2001 From: Alaeddine Abdessalem Date: Tue, 10 Aug 2021 08:15:46 +0100 Subject: [PATCH 2/2] test: refactor da/dam neural ops tests --- tests/unit/types/arrays/test_neural_ops.py | 81 +++++++++++++++++----- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/tests/unit/types/arrays/test_neural_ops.py b/tests/unit/types/arrays/test_neural_ops.py index a092476de81e1..8f987722379fa 100644 --- a/tests/unit/types/arrays/test_neural_ops.py +++ b/tests/unit/types/arrays/test_neural_ops.py @@ -1,7 +1,5 @@ import copy import os -import tempfile -from itertools import product import numpy as np import scipy.sparse as sp @@ -30,8 +28,8 @@ def doc_lists(): @pytest.fixture -def docarrays_for_embedding_distance_computation(): - D1, D2 = get_docs() +def docarrays_for_embedding_distance_computation(doc_lists): + D1, D2 = doc_lists da1 = DocumentArray(D1) da2 = DocumentArray(D2) return da1, da2 @@ -60,20 +58,44 @@ def embeddings(): return np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) +def doc_lists_to_doc_arrays( + doc_lists, tmpdir, first_memmap, second_memmap, buffer_pool_size +): + doc_list1, doc_list2 = doc_lists + + tmpdir1, tmpdir2 = tmpdir / '1', tmpdir / '2' + + D1 = ( + DocumentArray() + if not first_memmap + else DocumentArrayMemmap(tmpdir1, buffer_pool_size=buffer_pool_size) + ) + D1.extend(doc_list1) + D2 = ( + DocumentArray() + if not second_memmap + else DocumentArrayMemmap(tmpdir2, buffer_pool_size=buffer_pool_size) + ) + D2.extend(doc_list2) + return D1, D2 + + +@pytest.mark.parametrize('buffer_pool_size', [1000, 3]) @pytest.mark.parametrize('first_memmap', [True, False]) @pytest.mark.parametrize('second_memmap', [True, False]) @pytest.mark.parametrize( 'limit, batch_size', [(1, None), (2, None), (None, None), (1, 1), (1, 2), (2, 1)] ) def test_matching_retrieves_correct_number( - doc_lists, limit, batch_size, first_memmap, second_memmap, tmpdir + doc_lists, limit, batch_size, first_memmap, second_memmap, tmpdir, buffer_pool_size ): - doc_list1, doc_list2 = doc_lists - - D1 = DocumentArray() if not first_memmap else DocumentArrayMemmap(tmpdir) - D1.extend(doc_list1) - D2 = DocumentArray() if not first_memmap else DocumentArrayMemmap(tmpdir) - D2.extend(doc_list2) + D1, D2 = doc_lists_to_doc_arrays( + doc_lists, + tmpdir, + first_memmap, + second_memmap, + buffer_pool_size=buffer_pool_size, + ) D1.match(D2, metric='sqeuclidean', limit=limit, batch_size=batch_size) for m in D1.get_attributes('matches'): if limit is None: @@ -165,6 +187,9 @@ def scipy_cdist_metric(X, Y, *args): np.testing.assert_equal(distances, distances_scipy) +@pytest.mark.parametrize('buffer_pool_size', [1000, 3]) +@pytest.mark.parametrize('first_memmap', [True, False]) +@pytest.mark.parametrize('second_memmap', [True, False]) @pytest.mark.parametrize( 'normalization, metric', [ @@ -177,14 +202,26 @@ def scipy_cdist_metric(X, Y, *args): ], ) @pytest.mark.parametrize('use_scipy', [True, False]) -@pytest.mark.parametrize('gen_doc_arrays', gen_docarray_combinations()) def test_matching_retrieves_closest_matches( - gen_doc_arrays, normalization, metric, use_scipy + doc_lists, + tmpdir, + normalization, + metric, + use_scipy, + first_memmap, + second_memmap, + buffer_pool_size, ): """ Tests if match.values are returned 'low to high' if normalization is True or 'high to low' otherwise """ - D1, D2 = gen_doc_arrays() + D1, D2 = doc_lists_to_doc_arrays( + doc_lists, + tmpdir, + first_memmap, + second_memmap, + buffer_pool_size=buffer_pool_size, + ) D1.match( D2, metric=metric, limit=3, normalization=normalization, use_scipy=use_scipy ) @@ -263,12 +300,22 @@ def test_scipy_dist( np.testing.assert_equal(values_docarray, values_docarraymemmap) -@pytest.mark.parametrize('gen_doc_arrays', gen_docarray_combinations()) -def test_2arity_function(gen_doc_arrays): +@pytest.mark.parametrize('buffer_pool_size', [1000, 3]) +@pytest.mark.parametrize('first_memmap', [True, False]) +@pytest.mark.parametrize('second_memmap', [True, False]) +def test_2arity_function( + first_memmap, second_memmap, doc_lists, tmpdir, buffer_pool_size +): def dotp(x, y, *args): return np.dot(x, np.transpose(y)) - D1, D2 = gen_doc_arrays() + D1, D2 = doc_lists_to_doc_arrays( + doc_lists, + tmpdir, + first_memmap, + second_memmap, + buffer_pool_size=buffer_pool_size, + ) D1.match(D2, metric=dotp, use_scipy=True) for d in D1: