Skip to content

Commit 81ad947

Browse files
committed
california population projection and education project
1 parent bb0bcac commit 81ad947

10 files changed

+2285
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
CREATE SCHEMA ca_population;
2+
3+
USE ca_population;
4+
5+
CREATE TABLE pop_proj(
6+
county_code VARCHAR(45) NOT NULL,
7+
county_name VARCHAR(45) NOT NULL,
8+
date_year INT NOT NULL,
9+
race_code INT NOT NULL,
10+
race TEXT NOT NULL,
11+
gender VARCHAR(6) NOT NULL,
12+
age INT NOT NULL,
13+
population INT NOT NULL
14+
);
15+
16+
/* Load Data */
17+
/* ignore first header line, delimiter setting, etc*/
18+
LOAD DATA LOCAL INFILE 'C:\\Users\\User\\CA_DRU_proj_2010-2060.csv'
19+
INTO TABLE pop_proj
20+
FIELDS TERMINATED BY ','
21+
ENCLOSED BY '"'
22+
LINES TERMINATED BY '\n'
23+
IGNORE 1 LINES;
24+
25+
/* check the loaded data */
26+
SELECT * FROM pop_proj
27+
LIMIT 10;
28+
29+
/* SIDE NOTE */
30+
/*
31+
If there is any issues with loading local data is disabled, we need to enable it.
32+
33+
Ref: https://stackoverflow.com/questions/59993844/error-loading-local-data-is-disabled-this-must-be-enabled-on-both-the-client
34+
35+
Ref: if you can't access mysql from cmd, add mysql path in path first https://www.qualitestgroup.com/resources/knowledge-center/how-to-guide/add-mysql-path-windows/
36+
37+
1) log into to mysql from command line >> mysql -u <username> -p
38+
2) check local_infile varialbe current status >> show global variables like 'local_infile';
39+
3) if that is OFF,enable it >> SET GLOBAL local_infile=1;
40+
4) quit the server >> quit
41+
5) connect to server again >> mysql --local_infile=1 -u root -p
42+
6) run the load sql statement.
43+
*/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* To increase performance, index on county name */
2+
CREATE INDEX county_name
3+
ON pop_proj(county_name);
4+
5+
/* Inital list of male and female populations per county for 2014 */
6+
SELECT county_name, gender, SUM(population) As total_population
7+
FROM pop_proj
8+
WHERE date_year = 2014
9+
GROUP BY county_name, gender
10+
ORDER BY county_name;
11+
12+
/* return information in specially formatted table */
13+
/* list of male and female populations per county for 2014 */
14+
SELECT p.county_name,
15+
SUM(p.population) AS Male,
16+
female_pop.Female FROM
17+
(SELECT county_name, SUM(population) AS Female
18+
FROM pop_proj
19+
WHERE date_year = 2014 and gender = 'Female'
20+
GROUP BY county_name
21+
ORDER BY county_name) AS female_pop
22+
JOIN pop_proj p
23+
ON p.county_name = female_pop.county_name
24+
WHERE p.date_year = 2014 AND p.gender = 'Male'
25+
GROUP BY p.county_name
26+
ORDER BY p.county_name;
27+
28+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
https://data.gov.uk/dataset/cb7ae6f0-4be6-4935-9277-47e5ce24a11f/road-safety-data
2+
3+
4+
https://data.ca.gov/dataset/ca-educational-attainment-personal-income
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
USE ca_population;
2+
3+
CREATE TABLE ca_edu(
4+
date_year TEXT,
5+
age TEXT,
6+
gender VARCHAR(6),
7+
edu_attainment TEXT,
8+
income TEXT,
9+
population INT
10+
);
11+
12+
13+
/* Load data
14+
mysql --local_infile=1 -u root -p
15+
*/
16+
LOAD DATA LOCAL INFILE 'C:\\Users\\User\\cleaned_CA_Educational_Attainment___Personal_Income_2008-2014.csv'
17+
INTO TABLE ca_edu
18+
FIELDS TERMINATED BY ','
19+
ENCLOSED BY '"'
20+
LINES TERMINATED BY '\n'
21+
IGNORE 1 LINES;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/* What is the percentage of education attainment for each category across differnent age group? */
2+
SELECT
3+
ca_edu.age,
4+
ca_edu.edu_attainment,
5+
SUM(ca_edu.population) / total_pop_by_age.total_population AS cofficient
6+
FROM ca_edu
7+
JOIN
8+
(SELECT age, SUM(population) as total_population
9+
FROM ca_edu
10+
GROUP BY age) AS total_pop_by_age
11+
ON ca_edu.age = total_pop_by_age.age
12+
GROUP BY ca_edu.age, ca_edu.edu_attainment;
13+
14+
15+
/* create new demographics table from the result */
16+
CREATE TABLE demographics AS
17+
SELECT
18+
ca_edu.age,
19+
ca_edu.edu_attainment,
20+
SUM(ca_edu.population) / total_pop_by_age.total_population AS coefficient
21+
FROM ca_edu
22+
JOIN
23+
(SELECT age, SUM(population) as total_population
24+
FROM ca_edu
25+
GROUP BY age) AS total_pop_by_age
26+
ON ca_edu.age = total_pop_by_age.age
27+
GROUP BY ca_edu.age, ca_edu.edu_attainment;
28+
29+
30+
/*
31+
Using Population Projection data,
32+
What is the projection of education demand for each age group?
33+
*/
34+
SELECT
35+
temp_pop.date_year AS 'Year',
36+
demographics.edu_attainment AS 'Education',
37+
ROUND(SUM(temp_pop.total_pop * demographics.coefficient)) AS 'Demand'
38+
FROM
39+
(SELECT date_year, age, SUM(population) AS total_pop
40+
FROM pop_proj
41+
GROUP BY age, date_year) AS temp_pop
42+
JOIN demographics
43+
ON demographics.age = CASE
44+
WHEN temp_pop.age < 18 THEN '00 to 17'
45+
WHEN temp_pop.age < 64 THEN '65 to 80+'
46+
ELSE '18 to 64'
47+
END
48+
GROUP BY 1, 2;
49+
50+
51+

Database Clinics - MySQL/04.California Education/datasets/cleaned_CA_Educational_Attainment___Personal_Income_2008-2014.csv

+1,061
Large diffs are not rendered by default.

Database Clinics - MySQL/04.California Education/datasets/original/CA_Educational_Attainment___Personal_Income_2008-2014.csv

+1,061
Large diffs are not rendered by default.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
https://data.gov.uk/dataset/cb7ae6f0-4be6-4935-9277-47e5ce24a11f/road-safety-data
2+
3+
4+
https://data.ca.gov/dataset/ca-educational-attainment-personal-income

readme.md

+12
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,18 @@ SQLite project for
6262
+ **[At the Library](SQL%20Code%20Challenges%20-%20Lynda/At%20the%20Library)** - helping the Library to manage the books statistics, check out and loan process, managing on due loans, encouraging patrons, etc.
6363

6464

65+
## [California Population Project](Database%20Clinics%20-%20MySQL/01.California%20Population%20Projection)
66+
MySQL project for
67+
+ getting population data and loading for data analysis
68+
+ finidng out male and female populations per county for 2014
69+
70+
## [California Education Project](Database%20Clinics%20-%20MySQL/04.California%20Education)
71+
MySQL project for
72+
+ initital cleaning for education data
73+
+ finding the percentage of education attainment for each category across differnent age group
74+
+ getting demographics based on education data and age group
75+
+ finding out projection of education demand for each age group based on Population Projection and Education data
76+
6577
------------
6678

6779
# Bootcamps

0 commit comments

Comments
 (0)