Permalink
Browse files

Push updates to blank notebooks.

  • Loading branch information...
ljhopkins2 committed Dec 2, 2019
1 parent 77d4a89 commit 372dd82489fc66d0c17179a7f1a46030c5f9c15f
Showing with 28 additions and 158 deletions.
  1. BIN data/pipeline.pkl
  2. BIN data/things_to_pickle.pkl
  3. +18 −108 intro_to_pickle.ipynb
  4. +10 −50 read_a_pickle.ipynb
BIN +0 Bytes (100%) data/pipeline.pkl
Binary file not shown.
Binary file not shown.
@@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -60,16 +60,16 @@
"\n",
"Before we pickle a full model, let's demonstrate pickling on a simple list.\n",
"\n",
"Create a list called `my_vegetables` that contains some strings:"
"Create a list called `things_to_pickle` that contains some strings:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"my_vegetables = [\"cucumbers\", \"pigs\\' feet\", \"beets\", \"a peck of peppers\"]"
"things_to_pickle = [\"cucumbers\", \"pigs\\' feet\", \"beets\", \"a peck of peppers\"]"
]
},
{
@@ -85,13 +85,10 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('data/veggies_et_al.pkl','wb') as pickle_out:\n",
" pickle.dump(my_vegetables, pickle_out)"
]
"source": []
},
{
"cell_type": "markdown",
@@ -104,33 +101,17 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('data/veggies_et_al.pkl', 'rb') as pickle_in:\n",
" list_from_pickle = pickle.load(pickle_in)"
]
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['cucumbers', \"pigs' feet\", 'beets', 'a peck of peppers']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"list_from_pickle"
]
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
@@ -153,66 +134,9 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>text</th>\n",
" <th>handle</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>The question in this election: Who can put the...</td>\n",
" <td>HillaryClinton</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>If we stand together, there's nothing we can't...</td>\n",
" <td>HillaryClinton</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Both candidates were asked about how they'd co...</td>\n",
" <td>HillaryClinton</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" text handle\n",
"0 The question in this election: Who can put the... HillaryClinton\n",
"3 If we stand together, there's nothing we can't... HillaryClinton\n",
"4 Both candidates were asked about how they'd co... HillaryClinton"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"df = pd.read_csv('data/trump_clinton_tweets.csv')\n",
"df = df[df['is_retweet'] == False][['text', 'handle']]\n",
@@ -228,7 +152,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -247,20 +171,9 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.9937077604288045, 0.9217330538085255)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"pipe = Pipeline([\n",
" ('cv', CountVectorizer(min_df=3)),\n",
@@ -282,13 +195,10 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('data/pipeline.pkl', 'wb') as pickle_out:\n",
" pickle.dump(pipe, pickle_out)"
]
"source": []
},
{
"cell_type": "markdown",
@@ -11,12 +11,10 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pickle"
]
"source": []
},
{
"cell_type": "markdown",
@@ -27,13 +25,10 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"with open('data/pipeline.pkl', 'rb') as pickle_in:\n",
" pipe = pickle.load(pickle_in)"
]
"source": []
},
{
"cell_type": "markdown",
@@ -44,23 +39,10 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1])"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.predict(['MAGA!'])"
]
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
@@ -71,32 +53,10 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['york',\n",
" 'yorkers',\n",
" 'you',\n",
" 'young',\n",
" 'your',\n",
" 'yourself',\n",
" 'youtube',\n",
" 'zero',\n",
" 'zika',\n",
" 'zuckerman']"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pipe.named_steps['cv'].get_feature_names()[-10:]"
]
"outputs": [],
"source": []
},
{
"cell_type": "markdown",

0 comments on commit 372dd82

Please sign in to comment.