{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Tune a CNN on MNIST\n", "\n", "This tutorial walks through using Ax to tune two hyperparameters (learning rate and momentum) for a PyTorch CNN on the MNIST dataset trained using SGD with momentum.\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "[INFO 04-30 12:37:08] ipy_plotting: Injecting Plotly library into cell. Do not overwrite or delete cell.\n" ] } ], "source": [ "import torch\n", "import numpy as np\n", "\n", "from ax.plot.contour import plot_contour\n", "from ax.plot.trace import optimization_trace_single_method\n", "from ax.service.managed_loop import optimize\n", "from ax.utils.notebook.plotting import render, init_notebook_plotting\n", "from ax.utils.tutorials.cnn_utils import load_mnist, train, evaluate\n", "\n", "init_notebook_plotting()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dtype = torch.float\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Load MNIST data\n", "First, we need to load the MNIST data and partition it into training, validation, and test sets.\n", "\n", "Note: this will download the dataset if necessary." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ " 0%| | 0.00/9.91M [00:00" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "render(plot_contour(model=model, param_x='lr', param_y='momentum', metric_name='accuracy'))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Plot best objective as function of the iteration\n", "\n", "Show the model accuracy improving as we identify better hyperparameters." ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# `plot_single_method` expects a 2-d array of means, because it expects to average means from multiple \n", "# optimization runs, so we wrap out best objectives array in another array.\n", "best_objectives = np.array([[trial.objective_mean*100 for trial in experiment.trials.values()]])\n", "best_objective_plot = optimization_trace_single_method(\n", " y=np.maximum.accumulate(best_objectives, axis=1),\n", " title=\"Model performance vs. # of iterations\",\n", " ylabel=\"Classification Accuracy, %\",\n", ")\n", "render(best_objective_plot)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 6. Train CNN with best hyperparameters and evaluate on test set\n", "Note that the resulting accuracy on the test set might not be exactly the same as the maximum accuracy achieved on the evaluation set throughout optimization. " ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Arm(name='17_0', parameters={'lr': 0.0029176399675537317, 'momentum': 3.0347402313065844e-16})" ] }, "execution_count": 11, "metadata": { "bento_obj_id": "139717742884176" }, "output_type": "execute_result" } ], "source": [ "data = experiment.fetch_data()\n", "df = data.df\n", "best_arm_name = df.arm_name[df['mean'] == df['mean'].max()].values[0]\n", "best_arm = experiment.arms_by_name[best_arm_name]\n", "best_arm" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": true }, "outputs": [], "source": [ "net = train(\n", " train_loader=train_loader, \n", " parameters=best_arm.parameters,\n", " dtype=dtype,\n", " device=device,\n", ")\n", "test_accuracy = evaluate(\n", " net=net,\n", " data_loader=test_loader,\n", " dtype=dtype,\n", " device=device,\n", ")" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Classification Accuracy (test set): 97.06%\n" ] } ], "source": [ "print(f\"Classification Accuracy (test set): {round(test_accuracy*100, 2)}%\")" ] } ], "metadata": { "kernelspec": { "display_name": "python3", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 }