# Load the necessary packages
import numpy as np
import pandas as pd

from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

# Load the Cardio Dataset
mydata = pd.read_csv('/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_One_-_Python_for_Data_Science/CardioGood_Analysis/CardioGoodFitness.csv')

mydata.head()

mydata.describe(include="all")

mydata.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 180 entries, 0 to 179
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   Product        180 non-null    object
 1   Age            180 non-null    int64 
 2   Gender         180 non-null    object
 3   Education      180 non-null    int64 
 4   MaritalStatus  180 non-null    object
 5   Usage          180 non-null    int64 
 6   Fitness        180 non-null    int64 
 7   Income         180 non-null    int64 
 8   Miles          180 non-null    int64 
dtypes: int64(6), object(3)
memory usage: 12.8+ KB

import matplotlib.pyplot as plt
%matplotlib inline

mydata.hist(figsize=(20,30))

array([[<Axes: title={'center': 'Age'}>,
        <Axes: title={'center': 'Education'}>],
       [<Axes: title={'center': 'Usage'}>,
        <Axes: title={'center': 'Fitness'}>],
       [<Axes: title={'center': 'Income'}>,
        <Axes: title={'center': 'Miles'}>]], dtype=object)

import seaborn as sns #importing seaborn library

sns.boxplot(x="Gender", y="Age", data=mydata)

<Axes: xlabel='Gender', ylabel='Age'>

sns.boxplot(x="Product", y="Age", data=mydata)

<Axes: xlabel='Product', ylabel='Age'>

pd.crosstab(mydata['Product'],mydata['Gender'] )

pd.crosstab(mydata['Product'],mydata['MaritalStatus'] )

sns.countplot(x="Product", hue="Gender", data=mydata)

<Axes: xlabel='Product', ylabel='count'>

pd.pivot_table(mydata, index=['Product', 'Gender'],
                     columns=[ 'MaritalStatus'], aggfunc=len)

pd.pivot_table(mydata,'Income', index=['Product', 'Gender'],
                     columns=[ 'MaritalStatus'])

pd.pivot_table(mydata,'Miles', index=['Product', 'Gender'],
                     columns=[ 'MaritalStatus'])

sns.pairplot(mydata)

<seaborn.axisgrid.PairGrid at 0x797581eaef90>

mydata['Age'].std()

6.943498135399795

mydata['Age'].mean()

28.788888888888888

sns.displot(data=mydata, x='Age', kde=True)

<seaborn.axisgrid.FacetGrid at 0x797581000b10>

mydata.hist(by='Gender',column = 'Age')

array([<Axes: title={'center': 'Female'}>,
       <Axes: title={'center': 'Male'}>], dtype=object)

mydata.hist(by='Gender',column = 'Income')

array([<Axes: title={'center': 'Female'}>,
       <Axes: title={'center': 'Male'}>], dtype=object)

mydata.hist(by='Gender',column = 'Miles')

array([<Axes: title={'center': 'Female'}>,
       <Axes: title={'center': 'Male'}>], dtype=object)

mydata.hist(by='Product',column = 'Miles', figsize=(20,30))

array([[<Axes: title={'center': 'TM195'}>,
        <Axes: title={'center': 'TM498'}>],
       [<Axes: title={'center': 'TM798'}>, <Axes: >]], dtype=object)

# Select only the numerical columns before calculating the correlation.
numerical_data = mydata.select_dtypes(include=['number'])
corr = numerical_data.corr()
corr

sns.heatmap(corr, annot=True)

<Axes: >

# Simple Linear Regression

#Load function from Scikit-learn
from Scikit-learn import linear_model

# Create linear regression object
regr = linear_model.LinearRegression()

y = mydata['Miles']
x = mydata[['Usage','Fitness']]

# Train the model using the training sets
regr.fit(x,y)

LinearRegression()

LinearRegression()

print(f"Coefficients: {regr.coef_}")

Coefficients: [20.21486334 27.20649954]

print(f"Intercept: {regr.intercept_}")

Intercept: -56.74288178464862

#MilesPredicted = -56.74 + 20.21*Usage + 27.20*Fitness

# Convert notebook to html
!jupyter nbconvert --to html "/content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_One_-_Python_for_Data_Science/CardioGood_Analysis/Notebook+-+CardioGood+Fitness+Data+Analysis.ipynb"

[NbConvertApp] Converting notebook /content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_One_-_Python_for_Data_Science/CardioGood_Analysis/Notebook+-+CardioGood+Fitness+Data+Analysis.ipynb to html
[NbConvertApp] WARNING | Alternative text is missing on 10 image(s).
[NbConvertApp] Writing 891271 bytes to /content/drive/MyDrive/MIT - Data Sciences/Colab Notebooks/Week_One_-_Python_for_Data_Science/CardioGood_Analysis/Notebook+-+CardioGood+Fitness+Data+Analysis.html

	Product	Age	Gender	Education	MaritalStatus	Usage	Fitness	Income	Miles
count	180	180.000000	180	180.000000	180	180.000000	180.000000	180.000000	180.000000
unique	3	NaN	2	NaN	2	NaN	NaN	NaN	NaN
top	TM195	NaN	Male	NaN	Partnered	NaN	NaN	NaN	NaN
freq	80	NaN	104	NaN	107	NaN	NaN	NaN	NaN
mean	NaN	28.788889	NaN	15.572222	NaN	3.455556	3.311111	53719.577778	103.194444
std	NaN	6.943498	NaN	1.617055	NaN	1.084797	0.958869	16506.684226	51.863605
min	NaN	18.000000	NaN	12.000000	NaN	2.000000	1.000000	29562.000000	21.000000
25%	NaN	24.000000	NaN	14.000000	NaN	3.000000	3.000000	44058.750000	66.000000
50%	NaN	26.000000	NaN	16.000000	NaN	3.000000	3.000000	50596.500000	94.000000
75%	NaN	33.000000	NaN	16.000000	NaN	4.000000	4.000000	58668.000000	114.750000
max	NaN	50.000000	NaN	21.000000	NaN	7.000000	5.000000	104581.000000	360.000000

	Age	Education	Usage	Fitness	Income	Miles
Age	1.000000	0.280496	0.015064	0.061105	0.513414	0.036618
Education	0.280496	1.000000	0.395155	0.410581	0.625827	0.307284
Usage	0.015064	0.395155	1.000000	0.668606	0.519537	0.759130
Fitness	0.061105	0.410581	0.668606	1.000000	0.535005	0.785702
Income	0.513414	0.625827	0.519537	0.535005	1.000000	0.543473
Miles	0.036618	0.307284	0.759130	0.785702	0.543473	1.000000

CardioGood Fitness Case Study - Descriptive Statistics¶

The team identifies the following customer variables to study:¶

Perform descriptive analytics to create a customer profile for each CardioGood Fitness treadmill product line.¶

	Product	Age	Gender	Education	MaritalStatus	Usage	Fitness	Income	Miles
0	TM195	18	Male	14	Single	3	4	29562	112
1	TM195	19	Male	15	Single	2	3	31836	75
2	TM195	19	Female	14	Partnered	4	3	30699	66
3	TM195	19	Male	12	Single	3	3	32973	85
4	TM195	20	Male	13	Partnered	4	2	35247	47

	MaritalStatus	Partnered	Single
Product	Gender
TM195	Female	46153.777778	45742.384615
TM195	Male	50028.000000	43265.842105
TM498	Female	49724.800000	48920.357143
TM498	Male	49378.285714	47071.800000
TM798	Female	84972.250000	58516.000000
TM798	Male	81431.368421	68216.428571

	MaritalStatus	Partnered	Single
Product	Gender
TM195	Female	74.925926	78.846154
TM195	Male	80.190476	99.526316
TM498	Female	94.000000	80.214286
TM498	Male	87.238095	91.100000
TM798	Female	215.000000	133.333333
TM798	Male	176.315789	147.571429