jina-ai · hanxiao · Aug 10, 2021 · Aug 9, 2021 · Aug 10, 2021
diff --git a/tests/unit/types/arrays/test_neural_ops.py b/tests/unit/types/arrays/test_neural_ops.py
@@ -1,7 +1,5 @@
 import copy
 import os
-import tempfile
-from itertools import product
 
 import numpy as np
 import scipy.sparse as sp
@@ -13,7 +11,8 @@
 from jina.math.dimensionality_reduction import PCA
 
 
-def get_docs():
+@pytest.fixture()
+def doc_lists():
     d1 = Document(embedding=np.array([0, 0, 0]))
     d2 = Document(embedding=np.array([3, 0, 0]))
     d3 = Document(embedding=np.array([1, 0, 0]))
@@ -28,39 +27,9 @@ def get_docs():
     return [d1, d2, d3, d4], [d1_m, d2_m, d3_m, d4_m, d5_m]
 
 
-def gen_docarrays(is_dam1=False, is_dam2=False, buffer_pool_size=1000):
-    def func():
-        D1, D2 = get_docs()
-        if is_dam1:
-            da1 = DocumentArrayMemmap(
-                tempfile.mkdtemp(), buffer_pool_size=buffer_pool_size
-            )
-        else:
-            da1 = DocumentArray()
-
-        if is_dam2:
-            da2 = DocumentArrayMemmap(
-                tempfile.mkdtemp(), buffer_pool_size=buffer_pool_size
-            )
-        else:
-            da2 = DocumentArray()
-        da1.extend(D1)
-        da2.extend(D2)
-        return da1, da2
-
-    return func
-
-
-def gen_docarray_combinations():
-    return [
-        gen_docarrays(is_dam1, is_dam2)
-        for is_dam1, is_dam2 in product((True, False), (True, False))
-    ] + [gen_docarrays(False, True, buffer_pool_size=3)]
-
-
 @pytest.fixture
-def docarrays_for_embedding_distance_computation():
-    D1, D2 = get_docs()
+def docarrays_for_embedding_distance_computation(doc_lists):
+    D1, D2 = doc_lists
     da1 = DocumentArray(D1)
     da2 = DocumentArray(D2)
     return da1, da2
@@ -89,12 +58,44 @@ def embeddings():
     return np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]])
 
 
 @pytest.mark.parametrize('gen_doc_arrays', gen_docarray_combinations())
+def doc_lists_to_doc_arrays(
+    doc_lists, tmpdir, first_memmap, second_memmap, buffer_pool_size
+):
+    doc_list1, doc_list2 = doc_lists
+
+    tmpdir1, tmpdir2 = tmpdir / '1', tmpdir / '2'
+
+    D1 = (
+        DocumentArray()
+        if not first_memmap
+        else DocumentArrayMemmap(tmpdir1, buffer_pool_size=buffer_pool_size)
+    )
+    D1.extend(doc_list1)
+    D2 = (
+        DocumentArray()
+        if not second_memmap
+        else DocumentArrayMemmap(tmpdir2, buffer_pool_size=buffer_pool_size)
+    )
+    D2.extend(doc_list2)
+    return D1, D2
+
+
+@pytest.mark.parametrize('buffer_pool_size', [1000, 3])
+@pytest.mark.parametrize('first_memmap', [True, False])
+@pytest.mark.parametrize('second_memmap', [True, False])
 @pytest.mark.parametrize(
     'limit, batch_size', [(1, None), (2, None), (None, None), (1, 1), (1, 2), (2, 1)]
 )
-def test_matching_retrieves_correct_number(gen_doc_arrays, limit, batch_size):
-    D1, D2 = gen_doc_arrays()
+def test_matching_retrieves_correct_number(
+    doc_lists, limit, batch_size, first_memmap, second_memmap, tmpdir, buffer_pool_size
+):
+    D1, D2 = doc_lists_to_doc_arrays(
+        doc_lists,
+        tmpdir,
+        first_memmap,
+        second_memmap,
+        buffer_pool_size=buffer_pool_size,
+    )
     D1.match(D2, metric='sqeuclidean', limit=limit, batch_size=batch_size)
     for m in D1.get_attributes('matches'):
         if limit is None:
@@ -186,6 +187,9 @@ def scipy_cdist_metric(X, Y, *args):
     np.testing.assert_equal(distances, distances_scipy)
 
 
+@pytest.mark.parametrize('buffer_pool_size', [1000, 3])
+@pytest.mark.parametrize('first_memmap', [True, False])
+@pytest.mark.parametrize('second_memmap', [True, False])
 @pytest.mark.parametrize(
     'normalization, metric',
     [
@@ -198,14 +202,26 @@ def scipy_cdist_metric(X, Y, *args):
     ],
 )
 @pytest.mark.parametrize('use_scipy', [True, False])
-@pytest.mark.parametrize('gen_doc_arrays', gen_docarray_combinations())
 def test_matching_retrieves_closest_matches(
-    gen_doc_arrays, normalization, metric, use_scipy
+    doc_lists,
+    tmpdir,
+    normalization,
+    metric,
+    use_scipy,
+    first_memmap,
+    second_memmap,
+    buffer_pool_size,
 ):
     """
     Tests if match.values are returned 'low to high' if normalization is True or 'high to low' otherwise
     """
-    D1, D2 = gen_doc_arrays()
+    D1, D2 = doc_lists_to_doc_arrays(
+        doc_lists,
+        tmpdir,
+        first_memmap,
+        second_memmap,
+        buffer_pool_size=buffer_pool_size,
+    )
     D1.match(
         D2, metric=metric, limit=3, normalization=normalization, use_scipy=use_scipy
     )
@@ -284,12 +300,22 @@ def test_scipy_dist(
     np.testing.assert_equal(values_docarray, values_docarraymemmap)
 
 
-@pytest.mark.parametrize('gen_doc_arrays', gen_docarray_combinations())
-def test_2arity_function(gen_doc_arrays):
+@pytest.mark.parametrize('buffer_pool_size', [1000, 3])
+@pytest.mark.parametrize('first_memmap', [True, False])
+@pytest.mark.parametrize('second_memmap', [True, False])
+def test_2arity_function(
+    first_memmap, second_memmap, doc_lists, tmpdir, buffer_pool_size
+):
     def dotp(x, y, *args):
         return np.dot(x, np.transpose(y))
 
-    D1, D2 = gen_doc_arrays()
+    D1, D2 = doc_lists_to_doc_arrays(
+        doc_lists,
+        tmpdir,
+        first_memmap,
+        second_memmap,
+        buffer_pool_size=buffer_pool_size,
+    )
     D1.match(D2, metric=dotp, use_scipy=True)
 
     for d in D1: