Skip to content

Commit cf0288e

Browse files
CarmenCarmen
Carmen
authored and
Carmen
committed
first docs
0 parents  commit cf0288e

6 files changed

+3474
-0
lines changed

Capstone_milestone_report.pdf

549 KB
Binary file not shown.
63.5 KB
Binary file not shown.

analysis_data.pdf

29.5 KB
Binary file not shown.

data_reading_and_wrangling.ipynb

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"metadata": {},
6+
"source": [
7+
"<b style=\"font-size:150%;\"> Data wrangling </b> \n",
8+
"\n",
9+
"<p style=\"font-size:120%;\"> \n",
10+
"I have one year of measurements of the hard disks, where each snapshot \n",
11+
"corresponds to one day of data.<br>\n",
12+
"This script joins all the data in a single file and it filters the columns that will be used in the analysis. This script also removes unphysical data and converts the time into date format. </p> "
13+
]
14+
},
15+
{
16+
"cell_type": "code",
17+
"execution_count": 1,
18+
"metadata": {
19+
"collapsed": true
20+
},
21+
"outputs": [],
22+
"source": [
23+
"import numpy as np\n",
24+
"import pandas as pd\n",
25+
"import glob\n"
26+
]
27+
},
28+
{
29+
"cell_type": "code",
30+
"execution_count": 2,
31+
"metadata": {
32+
"collapsed": false
33+
},
34+
"outputs": [],
35+
"source": [
36+
"column_names=['date', 'serial_number', 'model', 'capacity_bytes', 'failure', \n",
37+
" 'smart_1_normalized', 'smart_1_raw','smart_3_normalized', 'smart_3_raw',\n",
38+
" 'smart_5_normalized', 'smart_5_raw','smart_9_normalized', 'smart_9_raw', \n",
39+
" 'smart_12_normalized', 'smart_12_raw','smart_194_normalized', 'smart_194_raw', ]\n",
40+
"files= glob.glob('2015*.csv')\n",
41+
"data= pd.concat([pd.read_csv(i, usecols= column_names) for i in files], \n",
42+
" ignore_index=True)\n",
43+
"# data cleaning\n",
44+
"data= data[data.capacity_bytes>0]\n",
45+
"data['date']= pd.to_datetime(data['date'], errors= 'coerce')\n"
46+
]
47+
},
48+
{
49+
"cell_type": "code",
50+
"execution_count": 3,
51+
"metadata": {
52+
"collapsed": true
53+
},
54+
"outputs": [],
55+
"source": [
56+
"data.to_csv('hard_drive_data_2015.csv')"
57+
]
58+
},
59+
{
60+
"cell_type": "code",
61+
"execution_count": null,
62+
"metadata": {
63+
"collapsed": false
64+
},
65+
"outputs": [],
66+
"source": []
67+
},
68+
{
69+
"cell_type": "code",
70+
"execution_count": null,
71+
"metadata": {
72+
"collapsed": true
73+
},
74+
"outputs": [],
75+
"source": []
76+
}
77+
],
78+
"metadata": {
79+
"anaconda-cloud": {},
80+
"kernelspec": {
81+
"display_name": "Python [Root]",
82+
"language": "python",
83+
"name": "Python [Root]"
84+
},
85+
"language_info": {
86+
"codemirror_mode": {
87+
"name": "ipython",
88+
"version": 3
89+
},
90+
"file_extension": ".py",
91+
"mimetype": "text/x-python",
92+
"name": "python",
93+
"nbconvert_exporter": "python",
94+
"pygments_lexer": "ipython3",
95+
"version": "3.5.2"
96+
}
97+
},
98+
"nbformat": 4,
99+
"nbformat_minor": 0
100+
}

0 commit comments

Comments
 (0)