coal-Data-Consumption-R-project


title: "Week 9 Coal Data Consumption"


knitr::opts_chunk$set(echo = TRUE)

Reading the dataset

coalConsumptionData <- read.csv("C:/Users/sudhi/Downloads/BIS_581/CoalConsumption_R_Project/annual-coal-consumption-by-country-1980-2009-2.csv")
head(coalConsumptionData, n=8)

Rename the first column to "Region"

colnames(coalConsumptionData)[1] <- "Region"
head(coalConsumptionData, n=8)

Display summary statistics of the dataset

summary(coalConsumptionData)

Loading the required libraries

Load the library for pivot_longer and pivot_wider functions.

library(tidyverse)  
library(dplyr)

Transforming the dataset into a long format using pivot_longer

coalLongData <- pivot_longer(coalConsumptionData, !Region, names_to = "Year", values_to = "Consumption")
coalLongData

Observing mixed data types in the 'Year' column

Utilizing transform and gsub to remove "X" from the 'Year' column

coalLongData <- transform(coalLongData, Year = gsub("X", "", Year))
head(coalLongData, n=8)

Converting 'Year' to a numeric format

coalLongData$Year <- as.numeric(as.character(coalLongData$Year))
is.numeric(coalLongData$Year)

Converting 'Consumption' from character to numeric

Checking if 'Consumption' is numeric

is.numeric(coalLongData$Consumption)

Checking if 'Consumption' is in character format

is.character(coalLongData$Consumption)

Converting 'Consumption' to numeric

coalLongData$Consumption <- as.numeric(coalLongData$Consumption)
is.numeric(coalLongData$Consumption)
summary(coalLongData)
#View(coalLongData)

Reviewing the class types for all columns

sapply(coalLongData, class)
summary(coalLongData)

Removing NA values

#install.packages("janitor")
#library(janitor)
head(coalLongData, n=10)
#library(tidyr)
#head(coalLongData,n=10)
#  drop_na()
#  head(coalLongData, n=10)
sum(is.na(coalLongData))
summary(coalLongData)
processedCoalData <- coalLongData

Separating continents, regions, and countries for further analysis

continentLabels <- c("Africa", "Asia", "Europe", "North America", "Central & South America", "Former U.S.S.R.",
                     "Middle East", "Central African Republic", "Asia & Oceania", "Antarctica")

Using filtering to categorize

continentData <- filter(processedCoalData, Region %in% continentLabels)
head(continentData, n=10)

View(continentData)

otherRegionData <- processedCoalData %>%
  filter(!(Region %in% continentLabels))
head(otherRegionData, n=10)
countrySpecificData <- otherRegionData[!(otherRegionData$Region %in% "World"),]
head(countrySpecificData, n=10)

library(ggplot2)

ggplot(continentData, aes(x = as.numeric(Year), y = Consumption, color = Region)) +
  geom_line(size = 1.2, alpha = 0.8, linetype = "solid") +  
  labs(
    title = "Coal Consumption Over Years by Region",
    x = "Year",
    y = "Consumption"
  ) +
  theme_minimal()

Faceted histograms by region

ggplot(continentData, aes(x = Consumption)) +
  geom_histogram(bins = 10) +
  facet_wrap(~Region) +
  labs(
    title = "Distribution of Coal Consumption by Region",
    x = "Consumption",
    y = "Frequency"
  ) +
  theme_minimal()

Stacked bar chart of total consumption by year

totalConsumptionByYear <- aggregate(Consumption ~ Year + Region, data = continentData, FUN = sum)

ggplot(totalConsumptionByYear, aes(x = as.factor(Year), y = Consumption, fill = Region)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Total Coal Consumption by Year",
    x = "Year",
    y = "Total Consumption"
  ) +
  theme_minimal()

Violin Plot

ggplot(continentData, aes(x = as.numeric(Year), y = Consumption, fill = as.factor(Year))) +
  geom_violin(trim = FALSE) +
  labs(
    title = "Violin Plot of Coal Consumption by Year",
    x = "Year",
    y = "Consumption"
  ) +
  theme_minimal()

Scatter plot

ggplot(continentData, aes(x = as.numeric(Year), y = Consumption, color = Region)) +
  geom_point() +
  labs(
    title = "Scatter Plot of Coal Consumption Over Years",
    x = "Year",
    y = "Consumption"
  ) +
  theme_minimal()

Aggregate total consumption by region

library(ggplot2)
library(dplyr)

totalConsumptionByRegion <- continentData %>%
  group_by(Region) %>%
  summarise(TotalConsumption = sum(Consumption)) %>%
  arrange(desc(TotalConsumption))

### Create bar chart
ggplot(totalConsumptionByRegion, aes(x = reorder(Region, -TotalConsumption), y = TotalConsumption)) +
  geom_bar(stat = "identity", fill = "violet") +
  labs(
    title = "Total Consumption by Region",
    x = "Region",
    y = "Total Consumption"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))