{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "In this lab, you’ll explore the breast cancer dataset and try to train the model to predict if the person is having breast cancer or not. We will start off with a weak learner, a decision tree with maximum depth = 2.\n", "\n", "We will then build an adaboost ensemble with 50 trees with a step of 3 and compare the performance with the weak learner.\n", "\n", "Let's get started by loading the libraries." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np \n", "import pandas as pd \n", "import sklearn\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn.model_selection import KFold\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.model_selection import cross_val_score\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import AdaBoostClassifier\n", "from sklearn.ensemble import GradientBoostingClassifier\n", "from sklearn.datasets import load_breast_cancer\n", "from sklearn.datasets import load_digits\n", "from sklearn import metrics\n", "%matplotlib inline\n", "\n", "import os\n", "import warnings\n", "warnings.filterwarnings('ignore')\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will use the breast cancer dataset in which the target variable has 1 if the person has cancer and 0 otherwise. Let's load the data." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "cancer = load_breast_cancer()\n", "digits = load_digits()\n", "\n", "data = cancer" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "df = pd.DataFrame(data= np.c_[data['data'], data['target']],\n", " columns= list(data['feature_names']) + ['target'])\n", "df['target'] = df['target'].astype('uint16')" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensiontarget
017.99010.38122.801001.00.118400.277600.3001000.1471000.24190.07871...17.33184.602019.00.162200.665600.711900.265400.46010.118900
120.57017.77132.901326.00.084740.078640.0869000.0701700.18120.05667...23.41158.801956.00.123800.186600.241600.186000.27500.089020
219.69021.25130.001203.00.109600.159900.1974000.1279000.20690.05999...25.53152.501709.00.144400.424500.450400.243000.36130.087580
311.42020.3877.58386.10.142500.283900.2414000.1052000.25970.09744...26.5098.87567.70.209800.866300.686900.257500.66380.173000
420.29014.34135.101297.00.100300.132800.1980000.1043000.18090.05883...16.67152.201575.00.137400.205000.400000.162500.23640.076780
512.45015.7082.57477.10.127800.170000.1578000.0808900.20870.07613...23.75103.40741.60.179100.524900.535500.174100.39850.124400
618.25019.98119.601040.00.094630.109000.1127000.0740000.17940.05742...27.66153.201606.00.144200.257600.378400.193200.30630.083680
713.71020.8390.20577.90.118900.164500.0936600.0598500.21960.07451...28.14110.60897.00.165400.368200.267800.155600.31960.115100
813.00021.8287.50519.80.127300.193200.1859000.0935300.23500.07389...30.73106.20739.30.170300.540100.539000.206000.43780.107200
912.46024.0483.97475.90.118600.239600.2273000.0854300.20300.08243...40.6897.65711.40.185301.058001.105000.221000.43660.207500
1016.02023.24102.70797.80.082060.066690.0329900.0332300.15280.05697...33.88123.801150.00.118100.155100.145900.099750.29480.084520
1115.78017.89103.60781.00.097100.129200.0995400.0660600.18420.06082...27.28136.501299.00.139600.560900.396500.181000.37920.104800
1219.17024.80132.401123.00.097400.245800.2065000.1118000.23970.07800...29.94151.701332.00.103700.390300.363900.176700.31760.102300
1315.85023.95103.70782.70.084010.100200.0993800.0536400.18470.05338...27.66112.00876.50.113100.192400.232200.111900.28090.062870
1413.73022.6193.60578.30.113100.229300.2128000.0802500.20690.07682...32.01108.80697.70.165100.772500.694300.220800.35960.143100
1514.54027.5496.73658.80.113900.159500.1639000.0736400.23030.07077...37.13124.10943.20.167800.657700.702600.171200.42180.134100
1614.68020.1394.74684.50.098670.072000.0739500.0525900.15860.05922...30.88123.401138.00.146400.187100.291400.160900.30290.082160
1716.13020.68108.10798.80.117000.202200.1722000.1028000.21640.07356...31.48136.801315.00.178900.423300.478400.207300.37060.114200
1819.81022.15130.001260.00.098310.102700.1479000.0949800.15820.05395...30.88186.802398.00.151200.315000.537200.238800.27680.076150
1913.54014.3687.46566.30.097790.081290.0666400.0478100.18850.05766...19.2699.70711.20.144000.177300.239000.128800.29770.072591
2013.08015.7185.63520.00.107500.127000.0456800.0311000.19670.06811...20.4996.09630.50.131200.277600.189000.072830.31840.081831
219.50412.4460.34273.90.102400.064920.0295600.0207600.18150.06905...15.6665.13314.90.132400.114800.088670.062270.24500.077731
2215.34014.26102.50704.40.107300.213500.2077000.0975600.25210.07032...19.08125.10980.90.139000.595400.630500.239300.46670.099460
2321.16023.04137.201404.00.094280.102200.1097000.0863200.17690.05278...35.59188.002615.00.140100.260000.315500.200900.28220.075260
2416.65021.38110.00904.60.112100.145700.1525000.0917000.19950.06330...31.56177.002215.00.180500.357800.469500.209500.36130.095640
2517.14016.40116.00912.70.118600.227600.2229000.1401000.30400.07413...21.40152.401461.00.154500.394900.385300.255000.40660.105900
2614.58021.5397.41644.80.105400.186800.1425000.0878300.22520.06924...33.21122.40896.90.152500.664300.553900.270100.42640.127500
2718.61020.25122.101094.00.094400.106600.1490000.0773100.16970.05699...27.26139.901403.00.133800.211700.344600.149000.23410.074210
2815.30025.27102.40732.40.108200.169700.1683000.0875100.19260.06540...36.71149.301269.00.164100.611000.633500.202400.40270.098760
2917.57015.05115.00955.10.098470.115700.0987500.0795300.17390.06149...19.52134.901227.00.125500.281200.248900.145600.27560.079190
..................................................................
5397.69125.4448.34170.40.086680.119900.0925200.0136400.20370.07751...31.8954.49223.60.159600.306400.339300.050000.27900.106601
54011.54014.4474.65402.90.099840.112000.0673700.0259400.18180.06782...19.6878.78457.80.134500.211800.179700.069180.23290.081341
54114.47024.9995.81656.40.088370.123000.1009000.0389000.18720.06341...31.73113.50808.90.134000.420200.404000.120500.31870.102301
54214.74025.4294.70668.60.082750.072140.0410500.0302700.18400.05680...32.29107.40826.40.106000.137600.161100.109500.27220.069561
54313.21028.0684.88538.40.086710.068770.0298700.0327500.16280.05781...37.1792.48629.60.107200.138100.106200.079580.24730.064431
54413.87020.7089.77584.80.095780.101800.0368800.0236900.16200.06688...24.7599.17688.60.126400.203700.137700.068450.22490.084921
54513.62023.2387.19573.20.092460.067470.0297400.0244300.16640.05801...29.0997.58729.80.121600.151700.104900.071740.26420.069531
54610.32016.3565.31324.90.094340.049940.0101200.0054950.18850.06201...21.7771.12384.90.128500.088420.043840.023810.26810.073991
54710.26016.5865.85320.80.088770.080660.0435800.0243800.16690.06714...22.0471.08357.40.146100.224600.178300.083330.26910.094791
5489.68319.3461.05285.70.084910.050300.0233700.0096150.15800.06235...25.5969.10364.20.119900.095460.093500.038460.25520.079201
54910.82024.2168.89361.60.081920.066020.0154800.0081600.19760.06328...31.4583.90505.60.120400.163300.061940.032640.30590.076261
55010.86021.4868.51360.50.074310.042270.0000000.0000000.16610.05948...24.7774.08412.30.100100.073480.000000.000000.24580.065921
55111.13022.4471.49378.40.095660.081940.0482400.0225700.20300.06552...28.2677.80436.60.108700.178200.156400.064130.31690.080321
55212.77029.4381.35507.90.082760.042340.0199700.0149900.15390.05637...36.0088.10594.70.123400.106400.086530.064980.24070.064841
5539.33321.9459.01264.00.092400.056050.0399600.0128200.16920.06576...25.0562.86295.80.110300.082980.079930.025640.24350.073931
55412.88028.9282.50514.30.081230.058240.0619500.0234300.15660.05708...35.7488.84595.70.122700.162000.243900.064930.23720.072421
55510.29027.6165.67321.40.090300.076580.0599900.0273800.15930.06127...34.9169.57357.60.138400.171000.200000.091270.22260.082831
55610.16019.5964.73311.70.100300.075040.0050250.0111600.17910.06331...22.8867.88347.30.126500.120000.010050.022320.22620.067421
5579.42327.8859.26271.30.081230.049710.0000000.0000000.17420.06059...34.2466.50330.60.107300.071580.000000.000000.24750.069691
55814.59022.6896.39657.10.084730.133000.1029000.0373600.14540.06147...27.27105.90733.50.102600.317100.366200.110500.22580.080041
55911.51023.9374.52403.50.092610.102100.1112000.0410500.13880.06570...37.1682.28474.20.129800.251700.363000.096530.21120.087321
56014.05027.1591.38600.40.099290.112600.0446200.0430400.15370.06171...33.17100.20706.70.124100.226400.132600.104800.22500.083211
56111.20029.3770.67386.00.074490.035580.0000000.0000000.10600.05502...38.3075.19439.60.092670.054940.000000.000000.15660.059051
56215.22030.62103.40716.90.104800.208700.2550000.0942900.21280.07152...42.79128.70915.00.141700.791701.170000.235600.40890.140900
56320.92025.09143.001347.00.109900.223600.3174000.1474000.21490.06879...29.41179.101819.00.140700.418600.659900.254200.29290.098730
56421.56022.39142.001479.00.111000.115900.2439000.1389000.17260.05623...26.40166.102027.00.141000.211300.410700.221600.20600.071150
56520.13028.25131.201261.00.097800.103400.1440000.0979100.17520.05533...38.25155.001731.00.116600.192200.321500.162800.25720.066370
56616.60028.08108.30858.10.084550.102300.0925100.0530200.15900.05648...34.12126.701124.00.113900.309400.340300.141800.22180.078200
56720.60029.33140.101265.00.117800.277000.3514000.1520000.23970.07016...39.42184.601821.00.165000.868100.938700.265000.40870.124000
5687.76024.5447.92181.00.052630.043620.0000000.0000000.15870.05884...30.3759.16268.60.089960.064440.000000.000000.28710.070391
\n", "

569 rows × 31 columns

\n", "
" ], "text/plain": [ " mean radius mean texture mean perimeter mean area mean smoothness \\\n", "0 17.990 10.38 122.80 1001.0 0.11840 \n", "1 20.570 17.77 132.90 1326.0 0.08474 \n", "2 19.690 21.25 130.00 1203.0 0.10960 \n", "3 11.420 20.38 77.58 386.1 0.14250 \n", "4 20.290 14.34 135.10 1297.0 0.10030 \n", "5 12.450 15.70 82.57 477.1 0.12780 \n", "6 18.250 19.98 119.60 1040.0 0.09463 \n", "7 13.710 20.83 90.20 577.9 0.11890 \n", "8 13.000 21.82 87.50 519.8 0.12730 \n", "9 12.460 24.04 83.97 475.9 0.11860 \n", "10 16.020 23.24 102.70 797.8 0.08206 \n", "11 15.780 17.89 103.60 781.0 0.09710 \n", "12 19.170 24.80 132.40 1123.0 0.09740 \n", "13 15.850 23.95 103.70 782.7 0.08401 \n", "14 13.730 22.61 93.60 578.3 0.11310 \n", "15 14.540 27.54 96.73 658.8 0.11390 \n", "16 14.680 20.13 94.74 684.5 0.09867 \n", "17 16.130 20.68 108.10 798.8 0.11700 \n", "18 19.810 22.15 130.00 1260.0 0.09831 \n", "19 13.540 14.36 87.46 566.3 0.09779 \n", "20 13.080 15.71 85.63 520.0 0.10750 \n", "21 9.504 12.44 60.34 273.9 0.10240 \n", "22 15.340 14.26 102.50 704.4 0.10730 \n", "23 21.160 23.04 137.20 1404.0 0.09428 \n", "24 16.650 21.38 110.00 904.6 0.11210 \n", "25 17.140 16.40 116.00 912.7 0.11860 \n", "26 14.580 21.53 97.41 644.8 0.10540 \n", "27 18.610 20.25 122.10 1094.0 0.09440 \n", "28 15.300 25.27 102.40 732.4 0.10820 \n", "29 17.570 15.05 115.00 955.1 0.09847 \n", ".. ... ... ... ... ... \n", "539 7.691 25.44 48.34 170.4 0.08668 \n", "540 11.540 14.44 74.65 402.9 0.09984 \n", "541 14.470 24.99 95.81 656.4 0.08837 \n", "542 14.740 25.42 94.70 668.6 0.08275 \n", "543 13.210 28.06 84.88 538.4 0.08671 \n", "544 13.870 20.70 89.77 584.8 0.09578 \n", "545 13.620 23.23 87.19 573.2 0.09246 \n", "546 10.320 16.35 65.31 324.9 0.09434 \n", "547 10.260 16.58 65.85 320.8 0.08877 \n", "548 9.683 19.34 61.05 285.7 0.08491 \n", "549 10.820 24.21 68.89 361.6 0.08192 \n", "550 10.860 21.48 68.51 360.5 0.07431 \n", "551 11.130 22.44 71.49 378.4 0.09566 \n", "552 12.770 29.43 81.35 507.9 0.08276 \n", "553 9.333 21.94 59.01 264.0 0.09240 \n", "554 12.880 28.92 82.50 514.3 0.08123 \n", "555 10.290 27.61 65.67 321.4 0.09030 \n", "556 10.160 19.59 64.73 311.7 0.10030 \n", "557 9.423 27.88 59.26 271.3 0.08123 \n", "558 14.590 22.68 96.39 657.1 0.08473 \n", "559 11.510 23.93 74.52 403.5 0.09261 \n", "560 14.050 27.15 91.38 600.4 0.09929 \n", "561 11.200 29.37 70.67 386.0 0.07449 \n", "562 15.220 30.62 103.40 716.9 0.10480 \n", "563 20.920 25.09 143.00 1347.0 0.10990 \n", "564 21.560 22.39 142.00 1479.0 0.11100 \n", "565 20.130 28.25 131.20 1261.0 0.09780 \n", "566 16.600 28.08 108.30 858.1 0.08455 \n", "567 20.600 29.33 140.10 1265.0 0.11780 \n", "568 7.760 24.54 47.92 181.0 0.05263 \n", "\n", " mean compactness mean concavity mean concave points mean symmetry \\\n", "0 0.27760 0.300100 0.147100 0.2419 \n", "1 0.07864 0.086900 0.070170 0.1812 \n", "2 0.15990 0.197400 0.127900 0.2069 \n", "3 0.28390 0.241400 0.105200 0.2597 \n", "4 0.13280 0.198000 0.104300 0.1809 \n", "5 0.17000 0.157800 0.080890 0.2087 \n", "6 0.10900 0.112700 0.074000 0.1794 \n", "7 0.16450 0.093660 0.059850 0.2196 \n", "8 0.19320 0.185900 0.093530 0.2350 \n", "9 0.23960 0.227300 0.085430 0.2030 \n", "10 0.06669 0.032990 0.033230 0.1528 \n", "11 0.12920 0.099540 0.066060 0.1842 \n", "12 0.24580 0.206500 0.111800 0.2397 \n", "13 0.10020 0.099380 0.053640 0.1847 \n", "14 0.22930 0.212800 0.080250 0.2069 \n", "15 0.15950 0.163900 0.073640 0.2303 \n", "16 0.07200 0.073950 0.052590 0.1586 \n", "17 0.20220 0.172200 0.102800 0.2164 \n", "18 0.10270 0.147900 0.094980 0.1582 \n", "19 0.08129 0.066640 0.047810 0.1885 \n", "20 0.12700 0.045680 0.031100 0.1967 \n", "21 0.06492 0.029560 0.020760 0.1815 \n", "22 0.21350 0.207700 0.097560 0.2521 \n", "23 0.10220 0.109700 0.086320 0.1769 \n", "24 0.14570 0.152500 0.091700 0.1995 \n", "25 0.22760 0.222900 0.140100 0.3040 \n", "26 0.18680 0.142500 0.087830 0.2252 \n", "27 0.10660 0.149000 0.077310 0.1697 \n", "28 0.16970 0.168300 0.087510 0.1926 \n", "29 0.11570 0.098750 0.079530 0.1739 \n", ".. ... ... ... ... \n", "539 0.11990 0.092520 0.013640 0.2037 \n", "540 0.11200 0.067370 0.025940 0.1818 \n", "541 0.12300 0.100900 0.038900 0.1872 \n", "542 0.07214 0.041050 0.030270 0.1840 \n", "543 0.06877 0.029870 0.032750 0.1628 \n", "544 0.10180 0.036880 0.023690 0.1620 \n", "545 0.06747 0.029740 0.024430 0.1664 \n", "546 0.04994 0.010120 0.005495 0.1885 \n", "547 0.08066 0.043580 0.024380 0.1669 \n", "548 0.05030 0.023370 0.009615 0.1580 \n", "549 0.06602 0.015480 0.008160 0.1976 \n", "550 0.04227 0.000000 0.000000 0.1661 \n", "551 0.08194 0.048240 0.022570 0.2030 \n", "552 0.04234 0.019970 0.014990 0.1539 \n", "553 0.05605 0.039960 0.012820 0.1692 \n", "554 0.05824 0.061950 0.023430 0.1566 \n", "555 0.07658 0.059990 0.027380 0.1593 \n", "556 0.07504 0.005025 0.011160 0.1791 \n", "557 0.04971 0.000000 0.000000 0.1742 \n", "558 0.13300 0.102900 0.037360 0.1454 \n", "559 0.10210 0.111200 0.041050 0.1388 \n", "560 0.11260 0.044620 0.043040 0.1537 \n", "561 0.03558 0.000000 0.000000 0.1060 \n", "562 0.20870 0.255000 0.094290 0.2128 \n", "563 0.22360 0.317400 0.147400 0.2149 \n", "564 0.11590 0.243900 0.138900 0.1726 \n", "565 0.10340 0.144000 0.097910 0.1752 \n", "566 0.10230 0.092510 0.053020 0.1590 \n", "567 0.27700 0.351400 0.152000 0.2397 \n", "568 0.04362 0.000000 0.000000 0.1587 \n", "\n", " mean fractal dimension ... worst texture worst perimeter \\\n", "0 0.07871 ... 17.33 184.60 \n", "1 0.05667 ... 23.41 158.80 \n", "2 0.05999 ... 25.53 152.50 \n", "3 0.09744 ... 26.50 98.87 \n", "4 0.05883 ... 16.67 152.20 \n", "5 0.07613 ... 23.75 103.40 \n", "6 0.05742 ... 27.66 153.20 \n", "7 0.07451 ... 28.14 110.60 \n", "8 0.07389 ... 30.73 106.20 \n", "9 0.08243 ... 40.68 97.65 \n", "10 0.05697 ... 33.88 123.80 \n", "11 0.06082 ... 27.28 136.50 \n", "12 0.07800 ... 29.94 151.70 \n", "13 0.05338 ... 27.66 112.00 \n", "14 0.07682 ... 32.01 108.80 \n", "15 0.07077 ... 37.13 124.10 \n", "16 0.05922 ... 30.88 123.40 \n", "17 0.07356 ... 31.48 136.80 \n", "18 0.05395 ... 30.88 186.80 \n", "19 0.05766 ... 19.26 99.70 \n", "20 0.06811 ... 20.49 96.09 \n", "21 0.06905 ... 15.66 65.13 \n", "22 0.07032 ... 19.08 125.10 \n", "23 0.05278 ... 35.59 188.00 \n", "24 0.06330 ... 31.56 177.00 \n", "25 0.07413 ... 21.40 152.40 \n", "26 0.06924 ... 33.21 122.40 \n", "27 0.05699 ... 27.26 139.90 \n", "28 0.06540 ... 36.71 149.30 \n", "29 0.06149 ... 19.52 134.90 \n", ".. ... ... ... ... \n", "539 0.07751 ... 31.89 54.49 \n", "540 0.06782 ... 19.68 78.78 \n", "541 0.06341 ... 31.73 113.50 \n", "542 0.05680 ... 32.29 107.40 \n", "543 0.05781 ... 37.17 92.48 \n", "544 0.06688 ... 24.75 99.17 \n", "545 0.05801 ... 29.09 97.58 \n", "546 0.06201 ... 21.77 71.12 \n", "547 0.06714 ... 22.04 71.08 \n", "548 0.06235 ... 25.59 69.10 \n", "549 0.06328 ... 31.45 83.90 \n", "550 0.05948 ... 24.77 74.08 \n", "551 0.06552 ... 28.26 77.80 \n", "552 0.05637 ... 36.00 88.10 \n", "553 0.06576 ... 25.05 62.86 \n", "554 0.05708 ... 35.74 88.84 \n", "555 0.06127 ... 34.91 69.57 \n", "556 0.06331 ... 22.88 67.88 \n", "557 0.06059 ... 34.24 66.50 \n", "558 0.06147 ... 27.27 105.90 \n", "559 0.06570 ... 37.16 82.28 \n", "560 0.06171 ... 33.17 100.20 \n", "561 0.05502 ... 38.30 75.19 \n", "562 0.07152 ... 42.79 128.70 \n", "563 0.06879 ... 29.41 179.10 \n", "564 0.05623 ... 26.40 166.10 \n", "565 0.05533 ... 38.25 155.00 \n", "566 0.05648 ... 34.12 126.70 \n", "567 0.07016 ... 39.42 184.60 \n", "568 0.05884 ... 30.37 59.16 \n", "\n", " worst area worst smoothness worst compactness worst concavity \\\n", "0 2019.0 0.16220 0.66560 0.71190 \n", "1 1956.0 0.12380 0.18660 0.24160 \n", "2 1709.0 0.14440 0.42450 0.45040 \n", "3 567.7 0.20980 0.86630 0.68690 \n", "4 1575.0 0.13740 0.20500 0.40000 \n", "5 741.6 0.17910 0.52490 0.53550 \n", "6 1606.0 0.14420 0.25760 0.37840 \n", "7 897.0 0.16540 0.36820 0.26780 \n", "8 739.3 0.17030 0.54010 0.53900 \n", "9 711.4 0.18530 1.05800 1.10500 \n", "10 1150.0 0.11810 0.15510 0.14590 \n", "11 1299.0 0.13960 0.56090 0.39650 \n", "12 1332.0 0.10370 0.39030 0.36390 \n", "13 876.5 0.11310 0.19240 0.23220 \n", "14 697.7 0.16510 0.77250 0.69430 \n", "15 943.2 0.16780 0.65770 0.70260 \n", "16 1138.0 0.14640 0.18710 0.29140 \n", "17 1315.0 0.17890 0.42330 0.47840 \n", "18 2398.0 0.15120 0.31500 0.53720 \n", "19 711.2 0.14400 0.17730 0.23900 \n", "20 630.5 0.13120 0.27760 0.18900 \n", "21 314.9 0.13240 0.11480 0.08867 \n", "22 980.9 0.13900 0.59540 0.63050 \n", "23 2615.0 0.14010 0.26000 0.31550 \n", "24 2215.0 0.18050 0.35780 0.46950 \n", "25 1461.0 0.15450 0.39490 0.38530 \n", "26 896.9 0.15250 0.66430 0.55390 \n", "27 1403.0 0.13380 0.21170 0.34460 \n", "28 1269.0 0.16410 0.61100 0.63350 \n", "29 1227.0 0.12550 0.28120 0.24890 \n", ".. ... ... ... ... \n", "539 223.6 0.15960 0.30640 0.33930 \n", "540 457.8 0.13450 0.21180 0.17970 \n", "541 808.9 0.13400 0.42020 0.40400 \n", "542 826.4 0.10600 0.13760 0.16110 \n", "543 629.6 0.10720 0.13810 0.10620 \n", "544 688.6 0.12640 0.20370 0.13770 \n", "545 729.8 0.12160 0.15170 0.10490 \n", "546 384.9 0.12850 0.08842 0.04384 \n", "547 357.4 0.14610 0.22460 0.17830 \n", "548 364.2 0.11990 0.09546 0.09350 \n", "549 505.6 0.12040 0.16330 0.06194 \n", "550 412.3 0.10010 0.07348 0.00000 \n", "551 436.6 0.10870 0.17820 0.15640 \n", "552 594.7 0.12340 0.10640 0.08653 \n", "553 295.8 0.11030 0.08298 0.07993 \n", "554 595.7 0.12270 0.16200 0.24390 \n", "555 357.6 0.13840 0.17100 0.20000 \n", "556 347.3 0.12650 0.12000 0.01005 \n", "557 330.6 0.10730 0.07158 0.00000 \n", "558 733.5 0.10260 0.31710 0.36620 \n", "559 474.2 0.12980 0.25170 0.36300 \n", "560 706.7 0.12410 0.22640 0.13260 \n", "561 439.6 0.09267 0.05494 0.00000 \n", "562 915.0 0.14170 0.79170 1.17000 \n", "563 1819.0 0.14070 0.41860 0.65990 \n", "564 2027.0 0.14100 0.21130 0.41070 \n", "565 1731.0 0.11660 0.19220 0.32150 \n", "566 1124.0 0.11390 0.30940 0.34030 \n", "567 1821.0 0.16500 0.86810 0.93870 \n", "568 268.6 0.08996 0.06444 0.00000 \n", "\n", " worst concave points worst symmetry worst fractal dimension target \n", "0 0.26540 0.4601 0.11890 0 \n", "1 0.18600 0.2750 0.08902 0 \n", "2 0.24300 0.3613 0.08758 0 \n", "3 0.25750 0.6638 0.17300 0 \n", "4 0.16250 0.2364 0.07678 0 \n", "5 0.17410 0.3985 0.12440 0 \n", "6 0.19320 0.3063 0.08368 0 \n", "7 0.15560 0.3196 0.11510 0 \n", "8 0.20600 0.4378 0.10720 0 \n", "9 0.22100 0.4366 0.20750 0 \n", "10 0.09975 0.2948 0.08452 0 \n", "11 0.18100 0.3792 0.10480 0 \n", "12 0.17670 0.3176 0.10230 0 \n", "13 0.11190 0.2809 0.06287 0 \n", "14 0.22080 0.3596 0.14310 0 \n", "15 0.17120 0.4218 0.13410 0 \n", "16 0.16090 0.3029 0.08216 0 \n", "17 0.20730 0.3706 0.11420 0 \n", "18 0.23880 0.2768 0.07615 0 \n", "19 0.12880 0.2977 0.07259 1 \n", "20 0.07283 0.3184 0.08183 1 \n", "21 0.06227 0.2450 0.07773 1 \n", "22 0.23930 0.4667 0.09946 0 \n", "23 0.20090 0.2822 0.07526 0 \n", "24 0.20950 0.3613 0.09564 0 \n", "25 0.25500 0.4066 0.10590 0 \n", "26 0.27010 0.4264 0.12750 0 \n", "27 0.14900 0.2341 0.07421 0 \n", "28 0.20240 0.4027 0.09876 0 \n", "29 0.14560 0.2756 0.07919 0 \n", ".. ... ... ... ... \n", "539 0.05000 0.2790 0.10660 1 \n", "540 0.06918 0.2329 0.08134 1 \n", "541 0.12050 0.3187 0.10230 1 \n", "542 0.10950 0.2722 0.06956 1 \n", "543 0.07958 0.2473 0.06443 1 \n", "544 0.06845 0.2249 0.08492 1 \n", "545 0.07174 0.2642 0.06953 1 \n", "546 0.02381 0.2681 0.07399 1 \n", "547 0.08333 0.2691 0.09479 1 \n", "548 0.03846 0.2552 0.07920 1 \n", "549 0.03264 0.3059 0.07626 1 \n", "550 0.00000 0.2458 0.06592 1 \n", "551 0.06413 0.3169 0.08032 1 \n", "552 0.06498 0.2407 0.06484 1 \n", "553 0.02564 0.2435 0.07393 1 \n", "554 0.06493 0.2372 0.07242 1 \n", "555 0.09127 0.2226 0.08283 1 \n", "556 0.02232 0.2262 0.06742 1 \n", "557 0.00000 0.2475 0.06969 1 \n", "558 0.11050 0.2258 0.08004 1 \n", "559 0.09653 0.2112 0.08732 1 \n", "560 0.10480 0.2250 0.08321 1 \n", "561 0.00000 0.1566 0.05905 1 \n", "562 0.23560 0.4089 0.14090 0 \n", "563 0.25420 0.2929 0.09873 0 \n", "564 0.22160 0.2060 0.07115 0 \n", "565 0.16280 0.2572 0.06637 0 \n", "566 0.14180 0.2218 0.07820 0 \n", "567 0.26500 0.4087 0.12400 0 \n", "568 0.00000 0.2871 0.07039 1 \n", "\n", "[569 rows x 31 columns]" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean radiusmean texturemean perimetermean areamean smoothnessmean compactnessmean concavitymean concave pointsmean symmetrymean fractal dimension...worst textureworst perimeterworst areaworst smoothnessworst compactnessworst concavityworst concave pointsworst symmetryworst fractal dimensiontarget
017.9910.38122.801001.00.118400.277600.30010.147100.24190.07871...17.33184.602019.00.16220.66560.71190.26540.46010.118900
120.5717.77132.901326.00.084740.078640.08690.070170.18120.05667...23.41158.801956.00.12380.18660.24160.18600.27500.089020
219.6921.25130.001203.00.109600.159900.19740.127900.20690.05999...25.53152.501709.00.14440.42450.45040.24300.36130.087580
311.4220.3877.58386.10.142500.283900.24140.105200.25970.09744...26.5098.87567.70.20980.86630.68690.25750.66380.173000
420.2914.34135.101297.00.100300.132800.19800.104300.18090.05883...16.67152.201575.00.13740.20500.40000.16250.23640.076780
\n", "

5 rows × 31 columns

\n", "
" ], "text/plain": [ " mean radius mean texture mean perimeter mean area mean smoothness \\\n", "0 17.99 10.38 122.80 1001.0 0.11840 \n", "1 20.57 17.77 132.90 1326.0 0.08474 \n", "2 19.69 21.25 130.00 1203.0 0.10960 \n", "3 11.42 20.38 77.58 386.1 0.14250 \n", "4 20.29 14.34 135.10 1297.0 0.10030 \n", "\n", " mean compactness mean concavity mean concave points mean symmetry \\\n", "0 0.27760 0.3001 0.14710 0.2419 \n", "1 0.07864 0.0869 0.07017 0.1812 \n", "2 0.15990 0.1974 0.12790 0.2069 \n", "3 0.28390 0.2414 0.10520 0.2597 \n", "4 0.13280 0.1980 0.10430 0.1809 \n", "\n", " mean fractal dimension ... worst texture worst perimeter worst area \\\n", "0 0.07871 ... 17.33 184.60 2019.0 \n", "1 0.05667 ... 23.41 158.80 1956.0 \n", "2 0.05999 ... 25.53 152.50 1709.0 \n", "3 0.09744 ... 26.50 98.87 567.7 \n", "4 0.05883 ... 16.67 152.20 1575.0 \n", "\n", " worst smoothness worst compactness worst concavity worst concave points \\\n", "0 0.1622 0.6656 0.7119 0.2654 \n", "1 0.1238 0.1866 0.2416 0.1860 \n", "2 0.1444 0.4245 0.4504 0.2430 \n", "3 0.2098 0.8663 0.6869 0.2575 \n", "4 0.1374 0.2050 0.4000 0.1625 \n", "\n", " worst symmetry worst fractal dimension target \n", "0 0.4601 0.11890 0 \n", "1 0.2750 0.08902 0 \n", "2 0.3613 0.08758 0 \n", "3 0.6638 0.17300 0 \n", "4 0.2364 0.07678 0 \n", "\n", "[5 rows x 31 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(455, 30)\n", "(455, 1)\n", "(114, 30)\n", "(114, 1)\n" ] } ], "source": [ "# adaboost experiments\n", "# create x and y train\n", "X = df.drop('target', axis=1)\n", "y = df[['target']]\n", "\n", "# split data into train and test/validation sets\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=101)\n", "print(X_train.shape)\n", "print(y_train.shape)\n", "print(X_test.shape)\n", "print(y_test.shape)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "target 0.626374\n", "dtype: float64\n", "target 0.631579\n", "dtype: float64\n" ] } ], "source": [ "# check the average cancer occurence rates in train and test data, should be comparable\n", "print(y_train.mean())\n", "print(y_test.mean())" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# base estimator: a weak learner with max_depth=2\n", "shallow_tree = DecisionTreeClassifier(max_depth=2, random_state = 100)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.93859649122807021" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# fit the shallow decision tree \n", "shallow_tree.fit(X_train, y_train)\n", "\n", "# test error\n", "y_pred = shallow_tree.predict(X_test)\n", "score = metrics.accuracy_score(y_test, y_pred)\n", "score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now, we will see the accuracy using the AdaBoost algorithm. In this following code, we will write code to calculate the accuracy of the AdaBoost models as we increase the number of trees from 1 to 50 with a step of 3 in the lines:\n", "\n", "'estimators = list(range(1, 50, 3))'\n", "\n", "'for n_est in estimators:'\n", "\n", "We finally end up with the accuracy of all the models in a single list abc_scores." ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# adaboost with the tree as base estimator\n", "\n", "estimators = list(range(1, 50, 3))\n", "\n", "abc_scores = []\n", "for n_est in estimators:\n", " ABC = AdaBoostClassifier(\n", " base_estimator=shallow_tree, \n", " n_estimators = n_est)\n", " \n", " ABC.fit(X_train, y_train)\n", " y_pred = ABC.predict(X_test)\n", " score = metrics.accuracy_score(y_test, y_pred)\n", " abc_scores.append(score)\n", " " ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.93859649122807021,\n", " 0.95614035087719296,\n", " 0.94736842105263153,\n", " 0.97368421052631582,\n", " 0.93859649122807021,\n", " 0.94736842105263153,\n", " 0.94736842105263153,\n", " 0.96491228070175439,\n", " 0.95614035087719296,\n", " 0.97368421052631582,\n", " 0.96491228070175439,\n", " 0.98245614035087714,\n", " 0.97368421052631582,\n", " 0.97368421052631582,\n", " 0.96491228070175439,\n", " 0.98245614035087714,\n", " 0.98245614035087714]" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "abc_scores" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# plot test scores and n_estimators\n", "# plot\n", "plt.plot(estimators, abc_scores)\n", "plt.xlabel('n_estimators')\n", "plt.ylabel('accuracy')\n", "plt.ylim([0.85, 1])\n", "plt.show()" ] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.4" } }, "nbformat": 4, "nbformat_minor": 2 }