From 364c69a92b045a6b0ed27fba6647116a9c585873 Mon Sep 17 00:00:00 2001 From: Maximilian Werk Date: Fri, 4 Jun 2021 12:41:15 +0200 Subject: [PATCH 1/2] feat: update example models for better running versions --- .github/pages/hello-world.md | 20 +++++++++++++++----- jina/helloworld/chatbot/executors.py | 2 +- jina/helloworld/multimodal/executors.py | 2 +- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/.github/pages/hello-world.md b/.github/pages/hello-world.md index 780d0123dbd3f..e5e99f3fa05fc 100644 --- a/.github/pages/hello-world.md +++ b/.github/pages/hello-world.md @@ -31,7 +31,9 @@ docker run -v "$(pwd)/j:/j" jinaai/jina hello fashion --workdir /j && open j/hel -This downloads the Fashion-MNIST training and test dataset and tells Jina to index 60,000 images from the training set. Then it randomly samples images from the test set as queries and asks Jina to retrieve relevant results. The whole process takes about 1 minute. +This downloads the Fashion-MNIST training and test dataset and tells Jina to index 60,000 images from the training set. +Then it randomly samples images from the test set as queries and asks Jina to retrieve relevant results. +The whole process takes about 1 minute. ## 🤖 Covid-19 Chatbot @@ -40,14 +42,17 @@ This downloads the Fashion-MNIST training and test dataset and tells Jina to ind -For NLP engineers, we provide a simple chatbot demo for answering Covid-19 questions. To run that: +For NLP engineers, we provide a simple chatbot demo for answering Covid-19 questions. +To run that: ```bash pip install --pre "jina[chatbot]" jina hello chatbot ``` -This downloads [CovidQA dataset](https://www.kaggle.com/xhlulu/covidqa) and tells Jina to index 418 question-answer pairs with DistilBERT. The index process takes about 1 minute on CPU. Then it opens a web page where you can input questions and ask Jina. +This downloads [CovidQA dataset](https://www.kaggle.com/xhlulu/covidqa) and tells Jina to index 418 question-answer pairs with MPNet. +The index process takes about 1 minute on CPU. +Then it opens a web page where you can input questions and ask Jina.



@@ -57,11 +62,16 @@ This downloads [CovidQA dataset](https://www.kaggle.com/xhlulu/covidqa) and tell -A multimodal-document contains multiple data types, e.g. a PDF document often contains figures and text. Jina lets you build a multimodal search solution in just minutes. To run our minimum multimodal document search demo: +A multimodal-document contains multiple data types, e.g. a PDF document often contains figures and text. +Jina lets you build a multimodal search solution in just minutes. +To run our minimum multimodal document search demo: ```bash pip install --pre "jina[multimodal]" jina hello multimodal ``` -This downloads [people image dataset](https://www.kaggle.com/ahmadahmadzada/images2000) and tells Jina to index 2,000 image-caption pairs with MobileNet and DistilBERT. The index process takes about 3 minute on CPU. Then it opens a web page where you can query multimodal documents. We have prepared [a YouTube tutorial](https://youtu.be/B_nH8GCmBfc) to walk you through this demo. +This downloads [people image dataset](https://www.kaggle.com/ahmadahmadzada/images2000) and tells Jina to index 2,000 image-caption pairs with MobileNet and MPNet. +The index process takes about 3 minute on CPU. +Then it opens a web page where you can query multimodal documents. +We have prepared [a YouTube tutorial](https://youtu.be/B_nH8GCmBfc) to walk you through this demo. diff --git a/jina/helloworld/chatbot/executors.py b/jina/helloworld/chatbot/executors.py index 5faea2f8cc586..fce29727d8372 100644 --- a/jina/helloworld/chatbot/executors.py +++ b/jina/helloworld/chatbot/executors.py @@ -12,7 +12,7 @@ class MyTransformer(Executor): def __init__( self, - pretrained_model_name_or_path: str = 'sentence-transformers/distilbert-base-nli-stsb-mean-tokens', + pretrained_model_name_or_path: str = 'sentence-transformers/paraphrase-mpnet-base-v2', base_tokenizer_model: Optional[str] = None, pooling_strategy: str = 'mean', layer_index: int = -1, diff --git a/jina/helloworld/multimodal/executors.py b/jina/helloworld/multimodal/executors.py index eb3f07c9620ff..e4e3425acf5b5 100644 --- a/jina/helloworld/multimodal/executors.py +++ b/jina/helloworld/multimodal/executors.py @@ -27,7 +27,7 @@ class TextEncoder(Executor): def __init__( self, - pretrained_model_name_or_path: str = 'sentence-transformers/distilbert-base-nli-stsb-mean-tokens', + pretrained_model_name_or_path: str = 'sentence-transformers/paraphrase-mpnet-base-v2', base_tokenizer_model: Optional[str] = None, pooling_strategy: str = 'mean', layer_index: int = -1, From 5ed2d111d2cd598b97aa813ed6f87225d1f3f4a5 Mon Sep 17 00:00:00 2001 From: Maximilian Werk Date: Fri, 4 Jun 2021 13:03:36 +0200 Subject: [PATCH 2/2] test: fixed chatbot test --- tests/system/chatbot/test_chatbot.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/system/chatbot/test_chatbot.py b/tests/system/chatbot/test_chatbot.py index aff81b4a8fed0..5d42a7491f581 100644 --- a/tests/system/chatbot/test_chatbot.py +++ b/tests/system/chatbot/test_chatbot.py @@ -24,7 +24,7 @@ def post_uri(): @pytest.fixture def expected_result(): - return '''no evidence from the outbreak that eating garlic, sipping water every 15 minutes or taking vitamin C will protect people from the new coronavirus.''' + return '''It's not completely up to you.''' @pytest.fixture(autouse=True) @@ -43,4 +43,6 @@ def test_chatbot(payload, post_uri, expected_result): """Regression test for chatbot example.""" resp = requests.post(post_uri, json=payload) assert resp.status_code == 200 + print('-------------------------1111111----') + print(resp.text) assert expected_result in resp.text