diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..8b151a2 --- /dev/null +++ b/.Rhistory @@ -0,0 +1 @@ +library(tidyverse) diff --git a/Introduction to R and Tidyverse.Rmd b/Introduction to R and Tidyverse.Rmd new file mode 100644 index 0000000..ba84ac7 --- /dev/null +++ b/Introduction to R and Tidyverse.Rmd @@ -0,0 +1,120 @@ +--- +title: "Lab | Introduction to R and Tidyverse" +output: html_notebook +--- + +1. **Install and load the Tidyverse**: +If you haven't already, install the tidyverse package and load it into your R session. + +```{r} +install.packages("tidyverse") +library(tidyverse) +``` + +2.**Create a data frame:** +Use the following code to create a sample data frame for this exercise. + +```{r} +employees <- data.frame( + ID = 1:6, + Name = c("Alice", "Bob", "Charlie", "David", "Eve", "Frank"), + Age = c(25, 30, 35, 40, 45, 50), + Department = c("HR", "IT", "Finance", "IT", "HR", "Finance"), + Salary = c(50000, 60000, 70000, 80000, 55000, 75000) + ) +``` +This data frame contains information about employees, including their ID, name, age, department, and salary. + +### Task 1: Explore the Data +1. Print the employees data frame to the console. +```{r} +print(employees) +``` + +2. Use the str() function to inspect the structure of the data frame. +```{r} +str(employees) +``` +3. Use the summary() function to get a summary of the data. +```{r} +summary(employees) +``` +### Task 2: Basic Data Manipulation with dplyr + +1. Filter rows: Create a new data frame that includes only employees who work in the "IT" department. +```{r} +it_employees <- employees %>% filter(Department == "IT") +print(it_employees) +``` + +2. Select columns: Create a new data frame that includes only the Name and Salary columns. +```{r} +name_salary_df<-employees %>% select(Name,Salary) +print(name_salary_df) +``` + +3. Add a new column: Add a new column called Bonus that calculates a 10% bonus for each employee based on their salary. +```{r} +mutated_df<- employees %>% mutate(Bonus=Salary*0.10) +print(mutated_df) +``` + +4. Sort rows: Sort the employees data frame by Salary in descending order. +```{r} +employees_sorted<-employees %>% arrange(desc(Salary)) +print(employees_sorted) +``` + +5. Summarize data: Calculate the average salary for each department. +```{r} +average_salary_by_dept<- employees %>% +group_by(Department) %>% +summarize(Average_Salary=mean(Salary)) +print(average_salary_by_dept) +``` +## Optional Exercises + +### Extra 1: Advanced Data Manipulation + +1. Group and summarize: Group the data by Department and calculate the total salary expenditure for each department. +```{r} +total_salary_exp_by_dept <- employees %>% +group_by(Department) %>% +summarize(Total_Salary = sum(Salary)) +print(total_salary_exp_by_dept) +``` + +2. Filter and mutate: Create a new data frame that includes only employees older than 30 and adds a column called Experience that assumes each employee has Age - 25 years of experience. +```{r} +employees_30_above_df <- employees %>% +filter(Age>30) %>% +mutate(Experience=Age-25) +print(employees_30_above_df) +``` +### Extra 2: Challenge + +1. Combine operations: Create a new data frame that includes employees from the "HR" department, adds a Bonus column (10% of salary), and sorts the data by Bonus in descending order. +```{r} +HR_dept_df <- employees %>% + filter(Department=="HR") %>% + mutate(Bonus=Salary*0.10) %>% + arrange(desc(Bonus)) +print(HR_dept_df) +``` + +2. Visualize data: Use ggplot2 to create a bar plot showing the total salary expenditure by department. +```{r} +library(ggplot2) +total_salary_by_dept <- employees %>% + group_by(Department) %>% + summarize(TotalSalary =sum(Salary)) + + ggplot(total_salary_by_dept, aes(x=Department, y=TotalSalary, fill=Department))+ + geom_bar(stat='identity') + + labs(title='Total Salary Expenditure by Department', + x='Departmnent', + y='Total Salary Expenditure') + + theme_minimal() +``` + + diff --git a/Introduction to R and Tidyverse.nb.html b/Introduction to R and Tidyverse.nb.html new file mode 100644 index 0000000..5417c43 --- /dev/null +++ b/Introduction to R and Tidyverse.nb.html @@ -0,0 +1,2174 @@ + + + + + + + + + + + + + +Lab | Introduction to R and Tidyverse + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + + + +
    +
  1. Install and load the Tidyverse: If you haven’t +already, install the tidyverse package and load it into your R +session.
  2. +
+ + + + +
install.packages("tidyverse")
+ + + +
Error in install.packages : Updating loaded packages
+ + + +
library(tidyverse)
+ + + +
── Attaching core tidyverse packages ─────────────────────────────────── tidyverse 2.0.0 ──
+✔ dplyr     1.1.4     ✔ readr     2.1.5
+✔ forcats   1.0.0     ✔ stringr   1.5.1
+✔ ggplot2   3.5.1     ✔ tibble    3.2.1
+✔ lubridate 1.9.4     ✔ tidyr     1.3.1
+✔ purrr     1.0.4     ── Conflicts ───────────────────────────────────────────────────── tidyverse_conflicts() ──
+✖ dplyr::filter() masks stats::filter()
+✖ dplyr::lag()    masks stats::lag()
+ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
+ + + +

2.Create a data frame: Use the following code to +create a sample data frame for this exercise.

+ + + + +
employees <- data.frame(
+          ID = 1:6,
+          Name = c("Alice", "Bob", "Charlie", "David", "Eve", "Frank"),
+          Age = c(25, 30, 35, 40, 45, 50),
+          Department = c("HR", "IT", "Finance", "IT", "HR", "Finance"),
+          Salary = c(50000, 60000, 70000, 80000, 55000, 75000)
+        )
+ + + + +

This data frame contains information about employees, including their +ID, name, age, department, and salary.

+
+

Task 1: Explore the Data

+
    +
  1. Print the employees data frame to the console.
  2. +
+ + + + +
print(employees)
+ + + + +
+ +
+ + + + +
    +
  1. Use the str() function to inspect the structure of the data +frame.
  2. +
+ + + + +
str(employees)
+ + + +
'data.frame':   6 obs. of  5 variables:
+ $ ID        : int  1 2 3 4 5 6
+ $ Name      : chr  "Alice" "Bob" "Charlie" "David" ...
+ $ Age       : num  25 30 35 40 45 50
+ $ Department: chr  "HR" "IT" "Finance" "IT" ...
+ $ Salary    : num  50000 60000 70000 80000 55000 75000
+ + + +
    +
  1. Use the summary() function to get a summary of the data.
  2. +
+ + + + +
summary(employees)
+ + + +
       ID           Name                Age         Department            Salary     
+ Min.   :1.00   Length:6           Min.   :25.00   Length:6           Min.   :50000  
+ 1st Qu.:2.25   Class :character   1st Qu.:31.25   Class :character   1st Qu.:56250  
+ Median :3.50   Mode  :character   Median :37.50   Mode  :character   Median :65000  
+ Mean   :3.50                      Mean   :37.50                      Mean   :65000  
+ 3rd Qu.:4.75                      3rd Qu.:43.75                      3rd Qu.:73750  
+ Max.   :6.00                      Max.   :50.00                      Max.   :80000  
+ + + +
+
+

Task 2: Basic Data Manipulation with dplyr

+
    +
  1. Filter rows: Create a new data frame that includes only employees +who work in the “IT” department.
  2. +
+ + + + +
it_employees <- employees %>% filter(Department == "IT")
+print(it_employees)
+ + + + +
+ +
+ + + + +
    +
  1. Select columns: Create a new data frame that includes only the Name +and Salary columns.
  2. +
+ + + + +
name_salary_df<-employees %>% select(Name,Salary)
+print(name_salary_df)
+ + + + +
+ +
+ + + + +
    +
  1. Add a new column: Add a new column called Bonus that calculates a +10% bonus for each employee based on their salary.
  2. +
+ + + + +
mutated_df<- employees %>% mutate(Bonus=Salary*0.10)
+print(mutated_df)
+ + + + +
+ +
+ + + + +
    +
  1. Sort rows: Sort the employees data frame by Salary in descending +order.
  2. +
+ + + + +
employees_sorted<-employees %>% arrange(desc(Salary))
+print(employees_sorted)
+ + + + +
+ +
+ + + + +
    +
  1. Summarize data: Calculate the average salary for each +department.
  2. +
+ + + + +
average_salary_by_dept<- employees %>% 
+group_by(Department) %>%
+summarize(Average_Salary=mean(Salary))
+print(average_salary_by_dept)
+ + + + +
+ +
+ + + + +
+
+

Optional Exercises

+
+

Extra 1: Advanced Data Manipulation

+
    +
  1. Group and summarize: Group the data by Department and calculate the +total salary expenditure for each department.
  2. +
+ + + + +
total_salary_exp_by_dept <- employees %>%
+group_by(Department) %>%
+summarize(Total_Salary = sum(Salary))
+print(total_salary_exp_by_dept)
+ + + + +
+ +
+ + + + +
    +
  1. Filter and mutate: Create a new data frame that includes only +employees older than 30 and adds a column called Experience that assumes +each employee has Age - 25 years of experience.
  2. +
+ + + + +
employees_30_above_df <- employees %>%
+filter(Age>30) %>%
+mutate(Experience=Age-25)
+print(employees_30_above_df)
+ + + + +
+ +
+ + + + +
+
+

Extra 2: Challenge

+
    +
  1. Combine operations: Create a new data frame that includes employees +from the “HR” department, adds a Bonus column (10% of salary), and sorts +the data by Bonus in descending order.
  2. +
+ + + + +
HR_dept_df <- employees %>%
+  filter(Department=="HR") %>%
+  mutate(Bonus=Salary*0.10) %>%
+  arrange(desc(Bonus))
+print(HR_dept_df)
+ + + + +
+ +
+ + + + +
    +
  1. Visualize data: Use ggplot2 to create a bar plot showing the total +salary expenditure by department.
  2. +
+ + + + +
library(ggplot2)
+total_salary_by_dept <- employees %>%
+  group_by(Department) %>%
+  summarize(TotalSalary =sum(Salary))
+            
+  ggplot(total_salary_by_dept, aes(x=Department, y=TotalSalary, fill=Department))+
+    geom_bar(stat='identity') +
+    labs(title='Total Salary Expenditure by Department',
+         x='Departmnent',
+         y='Total Salary Expenditure') +
+    theme_minimal()
+ + + + +

+ + + + + +
+
+ +
LS0tDQp0aXRsZTogIkxhYiB8IEludHJvZHVjdGlvbiB0byBSIGFuZCBUaWR5dmVyc2UiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoxLiAqKkluc3RhbGwgYW5kIGxvYWQgdGhlIFRpZHl2ZXJzZSoqOg0KSWYgeW91IGhhdmVuJ3QgYWxyZWFkeSwgaW5zdGFsbCB0aGUgdGlkeXZlcnNlIHBhY2thZ2UgYW5kIGxvYWQgaXQgaW50byB5b3VyIFIgc2Vzc2lvbi4NCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpgYGANCg0KMi4qKkNyZWF0ZSBhIGRhdGEgZnJhbWU6KiogDQpVc2UgdGhlIGZvbGxvd2luZyBjb2RlIHRvIGNyZWF0ZSBhIHNhbXBsZSBkYXRhIGZyYW1lIGZvciB0aGlzIGV4ZXJjaXNlLg0KDQpgYGB7cn0NCmVtcGxveWVlcyA8LSBkYXRhLmZyYW1lKA0KCQkgIElEID0gMTo2LA0KCQkgIE5hbWUgPSBjKCJBbGljZSIsICJCb2IiLCAiQ2hhcmxpZSIsICJEYXZpZCIsICJFdmUiLCAiRnJhbmsiKSwNCgkJICBBZ2UgPSBjKDI1LCAzMCwgMzUsIDQwLCA0NSwgNTApLA0KCQkgIERlcGFydG1lbnQgPSBjKCJIUiIsICJJVCIsICJGaW5hbmNlIiwgIklUIiwgIkhSIiwgIkZpbmFuY2UiKSwNCgkJICBTYWxhcnkgPSBjKDUwMDAwLCA2MDAwMCwgNzAwMDAsIDgwMDAwLCA1NTAwMCwgNzUwMDApDQoJCSkNCmBgYA0KVGhpcyBkYXRhIGZyYW1lIGNvbnRhaW5zIGluZm9ybWF0aW9uIGFib3V0IGVtcGxveWVlcywgaW5jbHVkaW5nIHRoZWlyIElELCBuYW1lLCBhZ2UsIGRlcGFydG1lbnQsIGFuZCBzYWxhcnkuDQoNCiMjIyBUYXNrIDE6IEV4cGxvcmUgdGhlIERhdGENCjEuIFByaW50IHRoZSBlbXBsb3llZXMgZGF0YSBmcmFtZSB0byB0aGUgY29uc29sZS4NCmBgYHtyfQ0KcHJpbnQoZW1wbG95ZWVzKQ0KYGBgDQoNCjIuIFVzZSB0aGUgc3RyKCkgZnVuY3Rpb24gdG8gaW5zcGVjdCB0aGUgc3RydWN0dXJlIG9mIHRoZSBkYXRhIGZyYW1lLg0KYGBge3J9DQpzdHIoZW1wbG95ZWVzKQ0KYGBgDQozLiBVc2UgdGhlIHN1bW1hcnkoKSBmdW5jdGlvbiB0byBnZXQgYSBzdW1tYXJ5IG9mIHRoZSBkYXRhLg0KYGBge3J9DQpzdW1tYXJ5KGVtcGxveWVlcykNCmBgYA0KIyMjIFRhc2sgMjogQmFzaWMgRGF0YSBNYW5pcHVsYXRpb24gd2l0aCBkcGx5cg0KDQoxLiBGaWx0ZXIgcm93czogQ3JlYXRlIGEgbmV3IGRhdGEgZnJhbWUgdGhhdCBpbmNsdWRlcyBvbmx5IGVtcGxveWVlcyB3aG8gd29yayBpbiB0aGUgIklUIiBkZXBhcnRtZW50Lg0KYGBge3J9DQppdF9lbXBsb3llZXMgPC0gZW1wbG95ZWVzICU+JSBmaWx0ZXIoRGVwYXJ0bWVudCA9PSAiSVQiKQ0KcHJpbnQoaXRfZW1wbG95ZWVzKQ0KYGBgDQoNCjIuIFNlbGVjdCBjb2x1bW5zOiBDcmVhdGUgYSBuZXcgZGF0YSBmcmFtZSB0aGF0IGluY2x1ZGVzIG9ubHkgdGhlIE5hbWUgYW5kIFNhbGFyeSBjb2x1bW5zLg0KYGBge3J9DQpuYW1lX3NhbGFyeV9kZjwtZW1wbG95ZWVzICU+JSBzZWxlY3QoTmFtZSxTYWxhcnkpDQpwcmludChuYW1lX3NhbGFyeV9kZikNCmBgYA0KDQozLiBBZGQgYSBuZXcgY29sdW1uOiBBZGQgYSBuZXcgY29sdW1uIGNhbGxlZCBCb251cyB0aGF0IGNhbGN1bGF0ZXMgYSAxMCUgYm9udXMgZm9yIGVhY2ggZW1wbG95ZWUgYmFzZWQgb24gdGhlaXIgc2FsYXJ5Lg0KYGBge3J9DQptdXRhdGVkX2RmPC0gZW1wbG95ZWVzICU+JSBtdXRhdGUoQm9udXM9U2FsYXJ5KjAuMTApDQpwcmludChtdXRhdGVkX2RmKQ0KYGBgDQoNCjQuIFNvcnQgcm93czogU29ydCB0aGUgZW1wbG95ZWVzIGRhdGEgZnJhbWUgYnkgU2FsYXJ5IGluIGRlc2NlbmRpbmcgb3JkZXIuDQpgYGB7cn0NCmVtcGxveWVlc19zb3J0ZWQ8LWVtcGxveWVlcyAlPiUgYXJyYW5nZShkZXNjKFNhbGFyeSkpDQpwcmludChlbXBsb3llZXNfc29ydGVkKQ0KYGBgDQoNCjUuIFN1bW1hcml6ZSBkYXRhOiBDYWxjdWxhdGUgdGhlIGF2ZXJhZ2Ugc2FsYXJ5IGZvciBlYWNoIGRlcGFydG1lbnQuDQpgYGB7cn0NCmF2ZXJhZ2Vfc2FsYXJ5X2J5X2RlcHQ8LSBlbXBsb3llZXMgJT4lIA0KZ3JvdXBfYnkoRGVwYXJ0bWVudCkgJT4lDQpzdW1tYXJpemUoQXZlcmFnZV9TYWxhcnk9bWVhbihTYWxhcnkpKQ0KcHJpbnQoYXZlcmFnZV9zYWxhcnlfYnlfZGVwdCkNCmBgYA0KIyMgT3B0aW9uYWwgRXhlcmNpc2VzDQoNCiMjIyBFeHRyYSAxOiBBZHZhbmNlZCBEYXRhIE1hbmlwdWxhdGlvbg0KDQoxLiBHcm91cCBhbmQgc3VtbWFyaXplOiBHcm91cCB0aGUgZGF0YSBieSBEZXBhcnRtZW50IGFuZCBjYWxjdWxhdGUgdGhlIHRvdGFsIHNhbGFyeSBleHBlbmRpdHVyZSBmb3IgZWFjaCBkZXBhcnRtZW50Lg0KYGBge3J9DQp0b3RhbF9zYWxhcnlfZXhwX2J5X2RlcHQgPC0gZW1wbG95ZWVzICU+JQ0KZ3JvdXBfYnkoRGVwYXJ0bWVudCkgJT4lDQpzdW1tYXJpemUoVG90YWxfU2FsYXJ5ID0gc3VtKFNhbGFyeSkpDQpwcmludCh0b3RhbF9zYWxhcnlfZXhwX2J5X2RlcHQpDQpgYGANCg0KMi4gRmlsdGVyIGFuZCBtdXRhdGU6IENyZWF0ZSBhIG5ldyBkYXRhIGZyYW1lIHRoYXQgaW5jbHVkZXMgb25seSBlbXBsb3llZXMgb2xkZXIgdGhhbiAzMCBhbmQgYWRkcyBhIGNvbHVtbiBjYWxsZWQgRXhwZXJpZW5jZSB0aGF0IGFzc3VtZXMgZWFjaCBlbXBsb3llZSBoYXMgQWdlIC0gMjUgeWVhcnMgb2YgZXhwZXJpZW5jZS4NCmBgYHtyfQ0KZW1wbG95ZWVzXzMwX2Fib3ZlX2RmIDwtIGVtcGxveWVlcyAlPiUNCmZpbHRlcihBZ2U+MzApICU+JQ0KbXV0YXRlKEV4cGVyaWVuY2U9QWdlLTI1KQ0KcHJpbnQoZW1wbG95ZWVzXzMwX2Fib3ZlX2RmKQ0KYGBgDQojIyMgRXh0cmEgMjogQ2hhbGxlbmdlDQoNCjEuIENvbWJpbmUgb3BlcmF0aW9uczogQ3JlYXRlIGEgbmV3IGRhdGEgZnJhbWUgdGhhdCBpbmNsdWRlcyBlbXBsb3llZXMgZnJvbSB0aGUgIkhSIiBkZXBhcnRtZW50LCBhZGRzIGEgQm9udXMgY29sdW1uICgxMCUgb2Ygc2FsYXJ5KSwgYW5kIHNvcnRzIHRoZSBkYXRhIGJ5IEJvbnVzIGluIGRlc2NlbmRpbmcgb3JkZXIuDQpgYGB7cn0NCkhSX2RlcHRfZGYgPC0gZW1wbG95ZWVzICU+JQ0KICBmaWx0ZXIoRGVwYXJ0bWVudD09IkhSIikgJT4lDQogIG11dGF0ZShCb251cz1TYWxhcnkqMC4xMCkgJT4lDQogIGFycmFuZ2UoZGVzYyhCb251cykpDQpwcmludChIUl9kZXB0X2RmKQ0KYGBgDQoNCjIuIFZpc3VhbGl6ZSBkYXRhOiBVc2UgZ2dwbG90MiB0byBjcmVhdGUgYSBiYXIgcGxvdCBzaG93aW5nIHRoZSB0b3RhbCBzYWxhcnkgZXhwZW5kaXR1cmUgYnkgZGVwYXJ0bWVudC4NCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0KdG90YWxfc2FsYXJ5X2J5X2RlcHQgPC0gZW1wbG95ZWVzICU+JQ0KICBncm91cF9ieShEZXBhcnRtZW50KSAlPiUNCiAgc3VtbWFyaXplKFRvdGFsU2FsYXJ5ID1zdW0oU2FsYXJ5KSkNCiAgICAgICAgICAgIA0KICBnZ3Bsb3QodG90YWxfc2FsYXJ5X2J5X2RlcHQsIGFlcyh4PURlcGFydG1lbnQsIHk9VG90YWxTYWxhcnksIGZpbGw9RGVwYXJ0bWVudCkpKw0KICAgIGdlb21fYmFyKHN0YXQ9J2lkZW50aXR5JykgKw0KICAgIGxhYnModGl0bGU9J1RvdGFsIFNhbGFyeSBFeHBlbmRpdHVyZSBieSBEZXBhcnRtZW50JywNCiAgICAgICAgIHg9J0RlcGFydG1uZW50JywNCiAgICAgICAgIHk9J1RvdGFsIFNhbGFyeSBFeHBlbmRpdHVyZScpICsNCiAgICB0aGVtZV9taW5pbWFsKCkNCmBgYA0KDQoNCg==
+ + + +
+ + + + + + + + + + + + + + + +