Skip to content

Lab is solved. AM #1

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
246 changes: 246 additions & 0 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -0,0 +1,246 @@
#Bar plot
library(ggplot2)
library(scales)
ggplot(sales_by_category, aes(x = Category, y = Total_Sales, fill = Category)) +
geom_bar(stat = "identity") +
labs(title = "Total Sales by Category",
x = "Category",
y = "Total Sales") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(labels = comma)
super_store <- read.csv("dataset/Sample - Superstore.csv")
head(super_store)
summary(super_store)
str(super_store)
nrow(super_store)
ncol(super_store)
colSums(is.na(super_store))
library(dplyr)
super_store1 <- super_store %>% select("Order.ID", "Order.Date", "Customer.Name", "Sales", "Profit")
filtered_profit <- super_store %>%
filter(Profit > 100)
sorted_by_sales <- super_store %>%
arrange(desc(Sales))
colSums(is.na(super_store))
#Creating a function to get the mode
library(DescTools)
super_store <- super_store %>%
mutate(Postal.Code = ifelse(is.na(Postal.Code), Mode(Age, na.rm = TRUE), Postal.Code))
super_store <- super_store %>%
filter(!is.na(Customer.Name))
super_store <- super_store %>%
mutate(Profit_Margin= Profit/Sales)
head(super_store)
library(lubridate)
super_store <- super_store %>%
mutate(Order.Date = mdy(Order.Date), # Convert to Date type
Order_Year = year(Order.Date)) # Extract the year from Order.Date
head(super_store)
category_summary <- super_store %>%
group_by(Category) %>%
summarise(
Total_Sales=sum(Sales),
Total_Profit=sum(Profit)
)
print(category_summary)
average_profit_margin <- super_store %>%
group_by(Region) %>%
summarise(
Average_Profit_Margin= Mean(Profit_Margin)
)
print(average_profit_margin)
number_orders_by_segment <- super_store %>%
group_by(Segment) %>%
summarise(Order_count= n())
print(number_orders_by_segment)
#Checking for duplicate rows
duplicated_rows <- super_store[duplicated(super_store$Order.ID), ]
nrow(duplicated_rows)
#Removing duplicate rows
super_store_unique <- super_store %>%
distinct(Order.ID, .Keep_all= TRUE)
nrow(super_store_unique)
# Check unique regions in super_store
unique(super_store$Region)
# Create a dummy dataset to join
population_data <- data.frame(
Region = c("South","West","Central","East"),
Population = c(1000000, 1500000, 2000000, 2500000)
)
print(population_data)
# Join with the Super_Store dataset
super_store <- super_store %>%
left_join(population_data, by = "Region")
# View the updated dataset
head(super_store)
#Total sales by category
sales_by_category <- super_store %>%
group_by(Category) %>%
summarise(
Total_Sales= sum(Sales)
)
head(sales_by_category)
#Bar plot
library(ggplot2)
library(scales)
ggplot(sales_by_category, aes(x = Category, y = Total_Sales, fill = Category)) +
geom_bar(stat = "identity") +
labs(title = "Total Sales by Category",
x = "Category",
y = "Total Sales") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(labels = comma)
#Histogram of Profit Margin
ggplot(data = super_store, aes(x = Profit_Margin)) +
geom_histogram(binwidth = 0.05, fill = "steelblue", color = "black") +
labs(title = "Distribution of Profit Margin",
x = "Profit Margin",
y = "Frequency") +
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
ggplot(sales_data, aes(x = Sales, y = Profit)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = "Scatter Plot of Sales vs. Profit with Trend Line",
x = "Sales",
y = "Profit") +
theme_minimal()
ggplot(super_store, aes(x = Sales, y = Profit)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = "Scatter Plot of Sales vs. Profit with Trend Line",
x = "Sales",
y = "Profit") +
theme_minimal()
ggplot(super_store, aes(x = Sales, y = Profit)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = "Scatter Plot of Sales vs. Profit with Trend Line",
x = "Sales",
y = "Profit") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
ggplot(super_store, aes(x = Sales, y = Profit)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = TRUE, show.legend = FALSE) +
labs(title = "Scatter Plot of Sales vs. Profit with Trend Line",
x = "Sales",
y = "Profit") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
ggplot(super_store, aes(x = Sales, y = Profit)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = "Scatter Plot of Sales vs. Profit with Trend Line",
x = "Sales",
y = "Profit") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
#Replace missing values with Mode
library(DescTools)
super_store <- super_store %>%
mutate(Postal.Code = ifelse(is.na(Postal.Code), Mode(Age, na.rm = TRUE), Postal.Code))
super_store <- read.csv("dataset/Sample - Superstore.csv")
head(super_store)
summary(super_store)
str(super_store)
nrow(super_store)
ncol(super_store)
colSums(is.na(super_store))
library(dplyr)
super_store1 <- super_store %>% select("Order.ID", "Order.Date", "Customer.Name", "Sales", "Profit")
filtered_profit <- super_store %>%
filter(Profit > 100)
sorted_by_sales <- super_store %>%
arrange(desc(Sales))
colSums(is.na(super_store))
#Replace missing values with Mode
library(DescTools)
super_store <- super_store %>%
mutate(Postal.Code = ifelse(is.na(Postal.Code), Mode(Age, na.rm = TRUE), Postal.Code))
super_store <- super_store %>%
filter(!is.na(Customer.Name))
super_store <- super_store %>%
mutate(Profit_Margin= Profit/Sales)
head(super_store)
library(lubridate)
super_store <- super_store %>%
mutate(Order.Date = mdy(Order.Date), # Convert to Date type
Order_Year = year(Order.Date)) # Extract the year from Order.Date
head(super_store)
category_summary <- super_store %>%
group_by(Category) %>%
summarise(
Total_Sales=sum(Sales),
Total_Profit=sum(Profit)
)
print(category_summary)
average_profit_margin <- super_store %>%
group_by(Region) %>%
summarise(
Average_Profit_Margin= Mean(Profit_Margin)
)
print(average_profit_margin)
number_orders_by_segment <- super_store %>%
group_by(Segment) %>%
summarise(Order_count= n())
print(number_orders_by_segment)
#Checking for duplicate rows
duplicated_rows <- super_store[duplicated(super_store$Order.ID), ]
nrow(duplicated_rows)
#Removing duplicate rows
super_store_unique <- super_store %>%
distinct(Order.ID, .Keep_all= TRUE)
nrow(super_store_unique)
# Check unique regions in super_store
unique(super_store$Region)
# Create a dummy dataset to join
population_data <- data.frame(
Region = c("South","West","Central","East"),
Population = c(1000000, 1500000, 2000000, 2500000)
)
print(population_data)
# Join with the Super_Store dataset
super_store <- super_store %>%
left_join(population_data, by = "Region")
# View the updated dataset
head(super_store)
#Total sales by category
sales_by_category <- super_store %>%
group_by(Category) %>%
summarise(
Total_Sales= sum(Sales)
)
head(sales_by_category)
#Bar plot
library(ggplot2)
library(scales)
ggplot(sales_by_category, aes(x = Category, y = Total_Sales, fill = Category)) +
geom_bar(stat = "identity") +
labs(title = "Total Sales by Category",
x = "Category",
y = "Total Sales") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(labels = comma)
ggplot(super_store, aes(x = Sales, y = Profit)) +
geom_point(color = "blue", size = 2) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = "Scatter Plot of Sales vs. Profit with Trend Line",
x = "Sales",
y = "Profit") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
#Histogram of Profit Margin
ggplot(data = super_store, aes(x = Profit_Margin)) +
geom_histogram(binwidth = 0.05, fill = "steelblue", color = "black") +
labs(title = "Distribution of Profit Margin",
x = "Profit Margin",
y = "Frequency") +
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
Loading