From cc9f3609f5d763acd7e1942ca9377f889882e720 Mon Sep 17 00:00:00 2001 From: Jaco Date: Sun, 8 Aug 2021 22:04:45 +1000 Subject: [PATCH] start production tutes --- docs_src/sidebar.json | 5 + nbs/100_tutorial.inference.ipynb | 343 +++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 nbs/100_tutorial.inference.ipynb diff --git a/docs_src/sidebar.json b/docs_src/sidebar.json index 9f79f4bd33..04ebb23b4d 100644 --- a/docs_src/sidebar.json +++ b/docs_src/sidebar.json @@ -37,6 +37,11 @@ "Lightning": "/migrating_lightning.html", "Catalyst": "/migrating_catalyst.html" } + }, + "empty4": { + "Production": { + "Inference": "/tutorial.inference.html" + } } }, "Training": { diff --git a/nbs/100_tutorial.inference.ipynb b/nbs/100_tutorial.inference.ipynb new file mode 100644 index 0000000000..cba25a2789 --- /dev/null +++ b/nbs/100_tutorial.inference.ipynb @@ -0,0 +1,343 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "#hide\n", + "#skip\n", + "! [ -e /content ] && pip install -Uqq fastai # upgrade fastai on colab" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference\n", + "\n", + "> How to perform inference with a tabular example" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We start by replicating the training from the [Tabular tutorial](https://docs.fast.ai/tutorial.tabular.html) most of details are skipped here so see that tutorial for more depth. We can then export the trained model for inference." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from fastai.tabular.all import *" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can download a sample of this dataset with the usual `untar_data` command:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(#5) [Path('/Users/jdp/.fastai/data/adult_sample/adult.csv'),Path('/Users/jdp/.fastai/data/adult_sample/export.pkl'),Path('/Users/jdp/.fastai/data/adult_sample/models'),Path('/Users/jdp/.fastai/data/adult_sample/tabular_trained_v_0_1'),Path('/Users/jdp/.fastai/data/adult_sample/tabular_learner_trained_v_0_1')]" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "path = untar_data(URLs.ADULT_SAMPLE)\n", + "path.ls()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(32561, 15)" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv(path/'adult.csv')\n", + "df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "dls = TabularDataLoaders.from_csv(path/'adult.csv', path=path, y_names=\"salary\",\n", + " cat_names = ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race'],\n", + " cont_names = ['age', 'fnlwgt', 'education-num'],\n", + " procs = [Categorify, FillMissing, Normalize])" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "learn = tabular_learner(dls, metrics=accuracy)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
epochtrain_lossvalid_lossaccuracytime
00.3602470.3563190.83599500:04
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "learn.fit_one_cycle(1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have a trained model we can export it to use for inference:\n", + "\n", + "This saves a copy of the learner to the learners data path `learn.path`, using the python pickle function. Note that it's a good idea to keep a log of your model describing the steps you used to arrive at it. This could include a copy of the training notebook or a good description in a notebook." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(#5) [Path('/Users/jdp/.fastai/data/adult_sample/adult.csv'),Path('/Users/jdp/.fastai/data/adult_sample/export.pkl'),Path('/Users/jdp/.fastai/data/adult_sample/models'),Path('/Users/jdp/.fastai/data/adult_sample/tabular_trained_v_0_1'),Path('/Users/jdp/.fastai/data/adult_sample/tabular_learner_trained_v_0_1')]" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "learn.export(\"tabular_learner_trained_v_0_1\")\n", + "learn_path = learn.path\n", + "learn_path.ls()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "----" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## In production\n", + "\n", + "> We copy/upload our trained learner to our production enviroment. \n", + "\n", + "Once there we can load it and start making predictions on unseen data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's delete our learner object before loading to simulate the production enviroment.\n", + "\n", + "> Warning: `load_learner` requires all your custom code be in the exact same place as when exporting your `Learner` (the main script, or the module you imported it from)." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "del learn\n", + "\n", + "learn = load_learner(learn_path/\"tabular_learner_trained_v_0_1\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The learner loads with an empty data loader, so to get prediction we need to load data into a test data loader.\n", + "\n", + "You can use the `test_dl` method of the `DataLoaders`. That dataframe does not need to have the dependent variable in its column." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(learn.dls.train_ds)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In production you would recieve data for inference from an external source for instance via an http request. \n", + "You will want to check this data before putting it through your model.\n", + "Checking the model could include:\n", + "\n", + "- Missing data.\n", + "- Unknown columns.\n", + "- Data that lies far outside the scope of your training data.\n", + "\n", + "Here we use a copy of the dataframe from training as an example." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "test_df = df.copy()\n", + "test_df.drop(['salary'], axis=1, inplace=True)\n", + "dl = learn.dls.test_dl(test_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then `Learner.get_preds` will give you the predictions:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "(tensor([[0.6570, 0.3430],\n", + " [0.4480, 0.5520],\n", + " [0.7964, 0.2036],\n", + " ...,\n", + " [0.6095, 0.3905],\n", + " [0.8448, 0.1552],\n", + " [0.8727, 0.1273]]),\n", + " None)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "learn.get_preds(dl=dl)" + ] + } + ], + "metadata": { + "jupytext": { + "split_at_heading": true + }, + "kernelspec": { + "display_name": "fastai", + "language": "python", + "name": "fastai" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.5" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +}