Permalink
Browse files

Push final project to GitHub

  • Loading branch information...
paulshoop committed Dec 5, 2018
0 parents commit 4354086df9cde493685cab4e460364c591eecbd5
Showing with 379,654 additions and 0 deletions.
  1. BIN Presentation/image_files/2009_class_counts.png
  2. BIN Presentation/image_files/2009_class_counts_clear.png
  3. BIN Presentation/image_files/class_time.png
  4. BIN Presentation/image_files/class_time_clear.png
  5. BIN Presentation/image_files/coefs.png
  6. BIN Presentation/image_files/coefs_clear.png
  7. BIN Presentation/image_files/coefsx.png
  8. BIN Presentation/image_files/coefsx_clear.png
  9. BIN Presentation/image_files/combo_cyl_disp_mpg.png
  10. BIN Presentation/image_files/combo_cyl_disp_mpg_clear.png
  11. BIN Presentation/image_files/disp_cyl_co2_mpg.png
  12. BIN Presentation/image_files/disp_cyl_co2_mpg_clear.png
  13. BIN Presentation/image_files/mpg_time.png
  14. BIN Presentation/image_files/mpg_time_clear.png
  15. BIN Presentation/image_files/smog1.png
  16. BIN Presentation/image_files/smog1_clear.png
  17. BIN Presentation/image_files/smog2.png
  18. BIN Presentation/image_files/smog2_clear.png
  19. BIN Presentation/image_files/t10_disp_cyl_co2_mpg.png
  20. BIN Presentation/image_files/t10_disp_cyl_co2_mpg_clear.png
  21. BIN Presentation/image_files/top10_mpg.png
  22. BIN Presentation/image_files/top10_mpg_clear.png
  23. +804 −0 PythonNotebooks/.ipynb_checkpoints/EDA-checkpoint.ipynb
  24. +547 −0 PythonNotebooks/.ipynb_checkpoints/EDA2-checkpoint.ipynb
  25. +329 −0 PythonNotebooks/.ipynb_checkpoints/User_Pred-checkpoint.ipynb
  26. +298 −0 PythonNotebooks/.ipynb_checkpoints/User_Pred2-checkpoint.ipynb
  27. +2,286 −0 PythonNotebooks/.ipynb_checkpoints/data_cleaning-checkpoint.ipynb
  28. +1,679 −0 PythonNotebooks/.ipynb_checkpoints/lin_reg-checkpoint.ipynb
  29. +1,244 −0 PythonNotebooks/.ipynb_checkpoints/lin_reg_year_filt-checkpoint.ipynb
  30. +1,235 −0 PythonNotebooks/.ipynb_checkpoints/top10_lin_reg-checkpoint.ipynb
  31. +2,286 −0 PythonNotebooks/Data_Cleaning.ipynb
  32. +804 −0 PythonNotebooks/EDA.ipynb
  33. +547 −0 PythonNotebooks/EDA2.ipynb
  34. +1,679 −0 PythonNotebooks/Linear_Regression.ipynb
  35. +329 −0 PythonNotebooks/User_Pred.ipynb
  36. +30 −0 Readme.md
  37. +560 −0 datasets/X_scaled.csv
  38. +40,522 −0 datasets/allcars_df.csv
  39. +39,826 −0 datasets/mpg.csv
  40. +39,826 −0 datasets/mpg_cats.csv
  41. +39,826 −0 datasets/mpg_fin.csv
  42. +39,826 −0 datasets/mpg_scaled
  43. +39,826 −0 datasets/mpg_scaled.csv
  44. +21,160 −0 datasets/top10_cats.csv
  45. +21,160 −0 datasets/top10_df.csv
  46. +21,160 −0 datasets/top10_fin.csv
  47. +21,160 −0 datasets/top10_scaled.csv
  48. +40,705 −0 datasets/vehicles.csv
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

Large diffs are not rendered by default.

Oops, something went wrong.

Large diffs are not rendered by default.

Oops, something went wrong.
@@ -0,0 +1,329 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<script>\n",
"code_show=true; \n",
"function code_toggle() {\n",
" if (code_show){\n",
" $('div.input').hide();\n",
" } else {\n",
" $('div.input').show();\n",
" }\n",
" code_show = !code_show\n",
"} \n",
"$( document ).ready(code_toggle);\n",
"</script>\n",
"<form action=\"javascript:code_toggle()\"><input type=\"submit\" value=\"Click here to toggle on/off the raw code.\"></form>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import HTML\n",
"\n",
"HTML('''<script>\n",
"code_show=true; \n",
"function code_toggle() {\n",
" if (code_show){\n",
" $('div.input').hide();\n",
" } else {\n",
" $('div.input').show();\n",
" }\n",
" code_show = !code_show\n",
"} \n",
"$( document ).ready(code_toggle);\n",
"</script>\n",
"<form action=\"javascript:code_toggle()\"><input type=\"submit\" value=\"Click here to toggle on/off the raw code.\"></form>''')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# input all the things\n",
"import pandas as pd \n",
"import numpy as np \n",
"from sklearn import metrics\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.model_selection import cross_val_score, cross_val_predict\n",
"from sklearn.metrics import r2_score\n",
"from sklearn import model_selection\n",
"from sklearn.linear_model import Ridge\n",
"from sklearn.preprocessing import StandardScaler\n",
"import scipy.stats as stats\n",
"import math\n",
"from IPython.display import Markdown, Javascript, display\n",
"from ipywidgets import widgets\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# read in data\n",
"mpg_df = pd.read_csv('../datasets/mpg_fin.csv')\n",
"mpg_not_scaled = pd.read_csv('../datasets/mpg_cats.csv')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# The function that turns object features into categories then into numeric values\n",
"def cat_code(df):\n",
" l_o_types = list(df.dtypes)\n",
" col_names = list(df.columns)\n",
" zip_list = list(zip(col_names, l_o_types))\n",
" \n",
" for i in range(len(zip_list)):\n",
" if zip_list[i][1] == 'object':\n",
" feat = zip_list[i][0]\n",
" df[feat] = df[feat].astype('category')\n",
" i += 1\n",
" \n",
" cat_columns = df.select_dtypes(['category']).columns\n",
" cat_columns\n",
" \n",
" df[cat_columns] = df[cat_columns].apply(lambda x: x.cat.codes)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# The function that will build and fit models to predict MPG based on user input 'year' and values\n",
"def user_predict(year, vals):\n",
" \n",
" # Build my data frame and target variable for the ridge/linear regression models\n",
" X = mpg_df[mpg_df['year'] == year][['vehicle_class', 'cylinders',\n",
" 'displacement', 'fuel_type']] \n",
" y = mpg_df[mpg_df['year'] == year].combMPG\n",
" \n",
" # send the data frame to the cat_code function to change strings to numbers\n",
" cat_code(X)\n",
" \n",
" # instantiate the ridge/linear models\n",
" ridgereg = Ridge(alpha=.1)\n",
" linreg = LinearRegression()\n",
" \n",
" # Fit the ridge/linear models\n",
" model = ridgereg.fit(X, y)\n",
" model2 = linreg.fit(X, y)\n",
" \n",
" # make predictions based on user input values\n",
" pred = ridgereg.predict([vals])\n",
" pred2 = linreg.predict([vals])\n",
" \n",
" # print results\n",
" display(Markdown('<br/><strong>The predicted MPG is approximately:</strong>'))\n",
" display(Markdown('**{}** or **{}**'.format(pred, pred2)))"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# A function that takes the user input 'year' and creates data frames/lists of valid variable values\n",
"# the results are printed\n",
"def get_vars(year):\n",
" # Get valid vehicle class values by using 'year' to filter data then build new data frame\n",
" classes = list(zip(mpg_not_scaled[mpg_not_scaled['year'] == year].vehicle_class.unique(), \n",
" mpg_df[mpg_df['year'] == year].vehicle_class.unique()))\n",
" classes.sort(key=lambda tup: tup[0])\n",
" classes_df = pd.DataFrame(classes, columns = ['number', 'class'])\n",
" \n",
" # Get valid fuel values by using 'year' to filter data then build new data frame\n",
" fuels = list(zip(mpg_not_scaled[mpg_not_scaled['year'] == year].fuel_type.unique(), \n",
" mpg_df[mpg_df['year'] == year].fuel_type.unique()))\n",
" fuels.sort(key=lambda tup: tup[0])\n",
" fuels_df = pd.DataFrame(fuels, columns = ['number', 'fuel type'])\n",
" \n",
" # display the results\n",
" display(Markdown('<br/><strong>The available vehicle classes are:</strong>'))\n",
" display(classes_df)\n",
" display(Markdown('<br/><strong>The available cylinders are:</strong>'))\n",
" print(sorted(mpg_df.cylinders.unique())) # here I am creating a list of valid cylinders values\n",
" display(Markdown('<br/><strong>The available diplacements are:</strong>'))\n",
" print(sorted(mpg_df.displacement.unique())) # here I am creating a list of valid displacement values\n",
" display(Markdown('<br/><strong>The available fuel types are:</strong>'))\n",
" display(fuels_df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img src=\"https://dannysengineportal.com/common-causes-of-poor-fuel-economy/fuel-efficiency-road-sign-illustration-design/\" style=\"float: left; margin: 40px; height: 300px\">"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# User MPG Prediction\n",
"\n",
"This form can be used in order to predict miles per gallon given a few variables. \n",
"\n",
"Through researching available EPA data, the primary variables affecting fuel efficiency are:\n",
"* vehicle type (or class)\n",
"* cylinders\n",
"* displacement\n",
"* fuel type\n",
"\n",
"If you'd like to view my research, you can find relevant programming notebooks here:\n",
"* future home of github link\n",
" \n",
"If you'd like to get the data for yourself, you can find it here:\n",
"http://www.fueleconomy.gov/feg/epadata/vehicles.csv.zip\n",
"\n",
"The data dictionary is here:\n",
"http://www.fueleconomy.gov/feg/ws/index.shtml#vehicle\n",
"\n",
"## To use this program, enter a year. You'll be given a list of valid vehicle classes, cylinders, displacements, and fuel types.\n",
"\n",
"#### Enter the information as follows: (vehicle code cylinders displacement fuel code) \n",
"#### Don't use commas ... \n",
"\n",
"For example:\n",
"0 6 3.6 8"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# simple function to run cells \n",
"def run_all(ev):\n",
" display(Javascript('IPython.notebook.execute_cells_below()'))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"IPython.notebook.execute_cells_below()"
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "fcbb2ac8485e4448b379ee886eb29821",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Button(description='GO', style=ButtonStyle())"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "08fc110c1d4e408887f110a0c85a7dc4",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Button(description='GO', style=ButtonStyle())"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# a function to create a button in order to runt the code below\n",
"button = widgets.Button(description=\"GO\")\n",
"button.on_click(run_all)\n",
"display(button)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"year_input = int(input(\"Enter a year between 1985 and 2018\"))\n",
"\n",
"get_vars(year_input)\n",
"\n",
"features = input(\"Input chosen features from list above separated by a space. No commas or quotes: \")\n",
"featured_cols = features.split() # required step to get input into a usable list\n",
"\n",
"featured_cols = [float(i) for i in featured_cols]\n",
"user_predict(year_input, featured_cols)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Oops, something went wrong.

0 comments on commit 4354086

Please sign in to comment.