%>%
, |>
)&
)|
)%in%
operatorhr96-21.csv
data
within your R project
folder.hr96-21.csv
file inside the
data
folder.read_csv()
function
included within the {tidyverse} package.hr
looks like this:arrange()
hr
, you can display candidates among
the winners (wl > 0
) with the lowest number of votes
(vote
).# A tibble: 3,659 × 8
ku kun seito name wl gender rank vote
<chr> <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
1 tokyo 22 社民 HOSAKA, NOBUTO 2 male 5 13904
2 kyoto 3 日本維新の会 MORI, NATSUE 2 female 4 16511
3 saitama 6 社民 FUKADA, HAJIME 2 male 4 17909
4 saitama 8 共産 SHIOKAWA, TETSUYA 2 male 4 18512
5 nara 1 民主 IENISHI, SATORU 2 male 4 18994
6 okinawa 1 共産 AKAMINE, SEIKEN 2 male 4 19528
7 kochi 3 共産 HARUNA, NAOAKI 2 male 3 19549
8 kyoto 5 希望 INOUE, KAZUNORI 2 male 4 19586
9 tokushima 1 維新 YOSHIDA, TOMOYO 2 female 3 20065
10 ehime 2 維新 YOKOYAMA, HIROYUKI 2 male 3 22677
# ℹ 3,649 more rows
is.na()
is.na()
function.# A tibble: 181 × 3
year name exp
<dbl> <chr> <dbl>
1 1996 ITO, MASAKO NA
2 1996 YAMADA, HIROSHI NA
3 1996 ASANO, KOSETSU NA
4 1996 ISHIKAWA, KAZUMI NA
5 1996 MURAMATSU, YOICHI NA
6 1996 YAMAZAKI, YOSHIAKI NA
7 1996 NAKANO, YOKO NA
8 1996 OGAWA, OSAMU NA
9 1996 ITO, TADAHIKO NA
10 1996 ITO, TAKAYOSHI NA
# ℹ 171 more rows
exp
.hr
.[1] 9660 22
exp
. exp
here, we use the negation symbol !
.# A tibble: 9,479 × 3
year name exp
<dbl> <chr> <dbl>
1 1996 KAWAMURA, TAKASHI 9828097
2 1996 IMAEDA, NORIO 9311555
3 1996 SATO, TAISUKE 9231284
4 1996 IWANAKA, MIHOKO 2177203
5 1996 AOKI, HIROYUKI 12940178
6 1996 TANABE, HIROO 16512426
7 1996 FURUKAWA, MOTOHISA 11435567
8 1996 ISHIYAMA, JUNICHI 2128510
9 1996 FUJIWARA, MICHIKO 3270533
10 1996 YOSHIDA, YUKIHIRO 11245219
# ℹ 9,469 more rows
exp
is not
missing.arrange()
arrange()
function.wl > 0
) from hr
and display the winning
candidates with the fewest votes (vote
) in ascending
order.hr %>%
filter(wl > 0) %>%
arrange(vote) %>%
select(year, ku, kun, seito, name, wl, gender, rank, vote)
# A tibble: 3,659 × 9
year ku kun seito name wl gender rank vote
<dbl> <chr> <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
1 1996 tokyo 22 社民 HOSAKA, NOBUTO 2 male 5 13904
2 2017 kyoto 3 日本維新の会 MORI, NATSUE 2 female 4 16511
3 1996 saitama 6 社民 FUKADA, HAJIME 2 male 4 17909
4 2003 saitama 8 共産 SHIOKAWA, TETSUYA 2 male 4 18512
5 1996 nara 1 民主 IENISHI, SATORU 2 male 4 18994
6 2003 okinawa 1 共産 AKAMINE, SEIKEN 2 male 4 19528
7 1996 kochi 3 共産 HARUNA, NAOAKI 2 male 3 19549
8 2017 kyoto 5 希望 INOUE, KAZUNORI 2 male 4 19586
9 2021 tokushima 1 維新 YOSHIDA, TOMOYO 2 female 3 20065
10 2014 ehime 2 維新 YOKOYAMA, HIROYU… 2 male 3 22677
# ℹ 3,649 more rows
wl == 1
.hr %>%
filter(wl == 1) %>%
arrange(vote) %>%
select(year, ku, kun, seito, name, wl, gender, rank, vote)
# A tibble: 2,674 × 9
year ku kun seito name wl gender rank vote
<dbl> <chr> <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
1 1996 kochi 1 共産 YAMAHARA, KENJIRO 1 male 1 33523
2 2000 kochi 1 自民 FUKUI, TERU 1 male 1 40765
3 2000 osaka 17 自民 OKASHITA, NOBUKO 1 female 1 41781
4 1996 kyoto 2 自民 OKUDA, MIKIO 1 male 1 43060
5 2003 kochi 1 自民 FUKUI, TERU 1 male 1 43232
6 2012 kochi 1 自民 FUKUI, TERU 1 male 1 44027
7 2009 kochi 1 自民 FUKUI, TERU 1 male 1 44068
8 1996 kanagawa 4 自民 IIJIMA, TADAYOSHI 1 male 1 46389
9 1996 tokushima 1 民主 SENGOKU, YOSHITO 1 male 1 47057
10 1996 fukui 1 新進 SASAKI, RYUZO 1 male 1 48214
# ℹ 2,664 more rows
arrange(desc())
desc()
function# A tibble: 9,660 × 9
year ku kun seito name wl gender rank vote
<dbl> <chr> <dbl> <chr> <chr> <dbl> <chr> <dbl> <dbl>
1 2021 kanagawa 15 自民 KONO, TARO 1 male 1 210515
2 2009 hokkaido 9 民主 HATOYAMA, YUKIO 1 male 1 201461
3 2009 shizuoka 6 民主 WATANABE, SHU 1 male 1 197688
4 2005 kanagawa 11 自民 KOIZUMI, JUNICHIRO 1 male 1 197037
5 2012 kanagawa 15 自民 KONO, TARO 1 male 1 192604
6 2009 saitama 6 民主 OSHIMA, ATSUHI 1 male 1 186993
7 2005 kanagawa 15 自民 KONO, TARO 1 male 1 186770
8 2009 hokkaido 3 民主 ARAI, SATOSHI 1 male 1 186081
9 2012 kanagawa 11 自民 KOIZUMI, SHINJIRO 1 male 1 184360
10 2009 shizuoka 5 民主 HOSONO, GOSHI 1 male 1 184328
# ℹ 9,650 more rows
DT::datatable()
function.bind_rows()
bind_rows()
.df1
and df2
df1 <- data.frame(id = 1:5,
name = c("A", "B", "C", "D", "E"),
score = c(100, 90, 80, 70, 60))
df2 <- data.frame(id = 6:8,
name = c("F", "G", "H"),
score = c(50, 40, 30))
df1
and df2
look like id name score
1 1 A 100
2 2 B 90
3 3 C 80
4 4 D 70
5 5 E 60
id name score
1 6 F 50
2 7 G 40
3 8 H 30
name
and score
.bind_rows()
function.id
, name
, and
score
match. id name score
1 1 A 100
2 2 B 90
3 3 C 80
4 4 D 70
5 5 E 60
6 6 F 50
7 7 G 40
8 8 H 30
list()
Functiondf1
and df2
represent student
data for the 1st and 2nd-year seminars, respectively.bind_rows()
to combine them vertically,
you lose information about the students’ academic year.list()
function..id argument
. year id name score
1 freshman 1 A 100
2 freshman 2 B 90
3 freshman 3 C 80
4 freshman 4 D 70
5 freshman 5 E 60
6 sophomore 6 F 50
7 sophomore 7 G 40
8 sophomore 8 H 30
univ
).df1 <- data.frame(univ = c("拓殖大学", "早稲田大学", "UCLA"),
city = c("東京", "東京", "LA"),
pop = c(8600, 47000, 45000))
df2 <- data.frame(univ = c("拓殖大学", "早稲田大学", "UCLA"),
color = c("オレンジ", "えんじ", "黄色と青"))
left_join()
right_join()
inner_join()
full_join()
by = "key variable name"
.left_join()
functionleft_join(x, y)
is a function that preserves x.left_join(x, y)
, UCLA in x is prioritized and
retained.
x <- data.frame(univ = c("拓殖大学", "早稲田大学", "UCLA"),
pop = c(8600, 47000, 45000))
y <- data.frame(univ = c("拓殖大学", "早稲田大学", "東北大学"),
founder = c("桂太郎", "大隈重信", "日本国"))
univ pop founder
1 拓殖大学 8600 桂太郎
2 早稲田大学 47000 大隈重信
3 UCLA 45000 <NA>
right_join()
functionright_join(x, y)
is a function that preserves data
frame y.right_join(x, y)
, 東北大学 in y is prioritized
and retained.
x <- data.frame(pop = c(8600, 47000, 45000),
univ = c("拓殖大学", "早稲田大学", "UCLA"))
y <- data.frame(univ = c("拓殖大学", "早稲田大学", "東北大学"),
founder = c("桂太郎", "大隈重信", "日本国"))
pop univ founder
1 8600 拓殖大学 桂太郎
2 47000 早稲田大学 大隈重信
3 NA 東北大学 日本国
inner_join()
function.inner_join(x, y)
merges only the rows that exist
simultaneously in both x and y datasets.
x <- data.frame(pop = c(8600, 47000, 45000),
univ = c("拓殖大学", "早稲田大学", "UCLA"))
y <- data.frame(univ = c("拓殖大学", "早稲田大学", "東北大学"),
founder = c("桂太郎", "大隈重信", "日本国"))
pop univ founder
1 8600 拓殖大学 桂太郎
2 47000 早稲田大学 大隈重信
full_join()
Functionfull_join(x, y)
preserves everything in both x and
y.
x <- data.frame(pop = c(8600, 47000, 45000),
univ = c("拓殖大学", "早稲田大学", "UCLA"))
y <- data.frame(univ = c("拓殖大学", "早稲田大学", "東北大学"),
founder = c("桂太郎", "大隈重信", "日本国"))
pop univ founder
1 8600 拓殖大学 桂太郎
2 47000 早稲田大学 大隈重信
3 45000 UCLA <NA>
4 NA 東北大学 日本国
tidy data
1. one column | = one variable |
2. one row | = one observation |
3. one cell | = one value |
4. one table | = one unit of analysis |
one column = one variable
principleparticipant number
, mos
, mc.
burger score
is split into two columns (mos
and mc
, two separate variables).burger scores
: mos
, mc
.burger score (score)
and
type of burger (burger)
.score
) should
represent burger scores.burger
) should
represent type of burger.one row = one row
principlemos
, mc
) regarding
burger scores.score
)
regarding burger score.burger
)
regarding type of burger.one cell = one value
principleone table = one unit of analysis
principlecountry
, prefecture
,
municipality
, and
administrative district
.What we want to do here ・ 50
students are enrolled
・ 5 check tests were conducted during the spring semester
・ Not all students took every check test
・ How can we fairly calculate the check test scores?
checktest.csv
, (N=50)データの準備
checktest.csv
tidyverse
} package, which is necessary for
analysisDownload checktest.csv
Load the final exam data and name it
df_checktest
Specify na = "."
to display missing values as
"."
df_checktest
df_check
{r, results = "asis"}
in the chunk optionsStatistic | N | Mean | St. Dev. | Min | Max |
test_1 | 43 | 80.256 | 11.146 | 60 | 100 |
test_2 | 46 | 77.804 | 11.268 | 55 | 100 |
test_3 | 42 | 80.357 | 8.647 | 60 | 100 |
test_4 | 44 | 78.182 | 10.308 | 50 | 100 |
test_5 | 43 | 80.721 | 9.407 | 55 | 100 |
・Each check test has 4 to 8 missing
values (NA
)
rowwise()
and na.rm = TRUE
→ This allows for calculating the average score while considering missing values.
df_check1 <- df_check1 |>
rowwise() |>
mutate(ave = mean(c(test_1, test_2, test_3, test_4, test_5),
na.rm = TRUE))
submission
) that indicates the
number of submitted check tests.df_check
into long-format
data df_check_long.
・Exclude students with missing
values in score from df_check_long.
group_by
() function to count the number of times
each student’s name appears in the name column of
df_check_long.
submission
df_check2
+ df_check_nosub
=>
df_st_list
name
) as the
key.df_st_list |>
ggplot(aes(submission, ave)) +
geom_point() +
labs(x = "Number of Check Tests Taken", y = "Average Check Test Score",
title = "Scatter Plot of Number of Check Tests Taken and Average Score") +
stat_smooth(method = lm) + # (method = lm, se = FALSE) → The 95% confidence interval disappears
geom_text(aes(y = ave + 0.2,
label = name),
size = 4,
vjust = 0)
Grading Criteria ・Final
Exam(60%)
・Assignment (40%)
R01_exam_score.csv, (N=100)
R01_hw.csv, (N=500)
.データの準備
R01_exam_score.csv
{tidyverse}
packageR01_exam_score.csv
to your computer.df_exam
.na = "."
to indicate missing values
as “.”.df_exam
using DT packagedf_exam
by using
summary()
function. name score
Length:100 Min. : 45.00
Class :character 1st Qu.: 68.00
Mode :character Median : 74.00
Mean : 75.23
3rd Qu.: 84.00
Max. :100.00
NA's :9
df_exam %>%
filter(!is.na(score)) %>%
ggplot() +
geom_histogram(aes(x = score), color = "white",
binwidth = 10, boundary = 0) +
labs(x = "Exam score", y = "Number of Students") +
geom_vline(xintercept = mean(df_exam$score,
na.rm = TRUE), # Since score contains missing values, specify na.rm = TRUE
col = "magenta") # Draw a vertical line in true magenta color at the average value
R01_hw.csv
R01_df_hw.csv
to your computerdf_exam
.na = "."
to indicate missing values
as “.”.提出 = submitted
,
未提出 = not submitted
We can see whether 100 students submitted their homework five
times.
Since it’s the result of 100 students submitting homework five times, N = 500.
df_exam
is N = 100, df_hw
is N =
500.group_by()
function to group name and home work
in df_hw
and calculate the total number of
submissions.submission
.submission
displays not only the number of times
homework was submitted but also the number of times it was not
submitted.filter()
function to hide it.hw
is also unnecessary, so use the
select()
function to hide it.df_hw %>%
filter(!is.na(submission)) %>% # 欠損値処理
ggplot() +
geom_histogram(aes(x = submission), color = "white",
binwidth = 1, boundary = 0) +
labs(x = "Number of Submissions", y = "Number of Students") +
geom_vline(xintercept = mean(df_hw$submission,
na.rm = T),
col = "yellow",
linetype = "dotted") # Draw a vertical dotted line in yellow color at the average value
summary()
function to display accurate
descriptive statistics. Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 3.000 4.000 3.652 4.000 5.000
df_exam
and
df_hw
) that are necessary for assigning grades.left_join()
right_join()
inner_join()
full_join()
left_join()
Grading Criteria_1 ・Final Exam
(60%)
・Homework (40%)
・Only those who took the final exam are
eligible for grading
This means that “Only those who took the final exam are eligible
for grading, regardless of whether they submitted homework.”
→ We preserve data frame x (in this case, df_exam
) and join
it with data frame y (in this case, df_hw
).
→ Use the left_join()
function.
In this case, the resulting joined data frame will have the following structure.
left_join()
is a joining method that preserves x.name
is the key variable.left_join()
includes all 100 students from x, giving
priority to x.score
and
submission
) required for grading.score
is on a scale of 100 points,
submission
is on a scale of 5 points.submission
to a scale of 100 points
(sub
).df_left <- df_left %>%
mutate(sub = submission * 100/5) %>%
select(name, score, sub) # submission を非表示にする
Grading Criteria_1 ・Final Exam
(60%)
・Homework (40%)
・Only those who took the final exam are
eligible for grading
round()
function to round it to the nearest whole number
and remove decimal places.name
and
grade.
df_left <- df_left %>%
mutate(grade = score * 0.6 + sub * 0.4) %>%
mutate(grade = round(grade, digits = 0)) %>%
select(name, grade)
df_left %>%
filter(!is.na(grade)) %>%
ggplot() +
geom_histogram(aes(x = grade), color = "white",
binwidth = 10, boundary = 0) +
labs(x = "Final Grade", y = "Number of Students") +
geom_vline(xintercept = mean(df_left$grade,
na.rm = T),
col = "yellow",
linetype = "dotted") # Draw a vertical dotted line in yellow color at the average value
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
53.00 67.75 75.00 74.62 81.00 100.00 16
full_join()
Grading Criteria_2 ・Final Exam
(60%)
・Homework (40%)
・Regardless of whether homework has been
submitted or not, if a perfect score is obtained in the final exam, a
passing grade (= 60 points) will be awarded
full_join()
function is used.full_join()
includes all rows that exist in both x and
y data frames.df_exam
and df_hw
, they can
be joined without any issues, and all students from Student_1 to
Student_100 can be joined together.name
serves as the key variable.score
and submission
) have been created.score
is on a scale of 100 points,
submission
is on a 5-point scale.submission
to a 100-point
scale value (sub
).df_full <- df_full %>%
mutate(sub = submission * 100/5) %>%
select(name, score, sub) # submission を非表示にする
name score sub
Length:100 Min. : 45.00 Min. : 20.00
Class :character 1st Qu.: 68.00 1st Qu.: 60.00
Mode :character Median : 74.00 Median : 80.00
Mean : 75.23 Mean : 73.04
3rd Qu.: 84.00 3rd Qu.: 80.00
Max. :100.00 Max. :100.00
NA's :9 NA's :8
NA's = 9
).NA's = 8
).Grading Criteria_2 ・Final Exam
(60%)
・Homework (40%)
・Regardless of whether homework has been
submitted or not, if a perfect score is obtained in the final exam, a
passing grade (= 60 points) will be awarded
round()
function to round it to the nearest whole number
and remove the decimal places.name
and grade.
df_full <- df_full %>%
mutate(grade = score * 0.6 + sub * 0.4) %>%
mutate(grade = round(grade, digits = 0)) %>%
select(name, score, sub,grade)
df_full %>%
filter(!is.na(grade)) %>%
ggplot() +
geom_histogram(aes(x = grade), color = "white",
binwidth = 10, boundary = 0) +
labs(x = "Final Grade", y = "Number of Students") +
geom_vline(xintercept = mean(df_full$grade,
na.rm = T),
col = "yellow",
linetype = "dotted") # Draw a vertical dotted line in yellow color at the average value
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
53.00 67.75 75.00 74.62 81.00 100.00 16
inner_join()
Grading Criteria_3 ・Final exam
(60%)
・Homework (40%)
・Individuals who have never submitted homework
or have not taken the final exam will not be graded.
inner_join()
function is used in this case.inner_join()
combines rows that exist in both x and y
data frames.df_exam
and df_hw
should be joined
without issues.name
is the key variable used for
merging.inner_join()
combines only the 92 students who exist
simultaneously in both x and y.score
and submission
) have been created.score
is in a 100-point scale, while
submission
is in a 5-point scale.submission
into a 100-point scale
(sub
) to align the scales.df_final <- df_final %>%
mutate(sub = submission * 100/5) %>%
select(name, score, sub) # submission を非表示にする
Grading Criteria_3 ・Final exam
(60%)
・Homework (40%)
・Individuals who have never submitted homework
or have not taken the final exam will not be graded.
round()
function to round it to the nearest whole number
and display only the variables name
and
grade
.df_final <- df_final %>%
mutate(grade = score * 0.6 + sub * 0.4) %>%
mutate(grade = round(grade, digits = 0)) %>%
select(name, grade)
df_final %>%
filter(!is.na(grade)) %>%
ggplot() +
geom_histogram(aes(x = grade), color = "white",
binwidth = 10, boundary = 0) +
labs(x = "Final Grade", y = "Number of Students") +
geom_vline(xintercept = mean(df_final$grade,
na.rm = T),
col = "yellow",
linetype = "dotted") # Draw a vertical dotted line in yellow color at the average value
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
53.00 67.75 75.00 74.62 81.00 100.00 8
tidyr
package.pivot_longer() |
wide (cluttered data) → long (tidy data) |
pivot_wider() |
long (tidy data) → wide (cluttered data) |
separate() |
separating cell (year/momth/day → year, month, day) |
pivot_longer()
Data Preparation
(mos_mc_paired.csv
) ・Download mos_mc_paired.csv
to your computer.
・Manually place the downloaded mos_mc.csv
into the data
folder inside the RProject folder.
・Read the data.
pivot_longer()
function.pivot_longer()
function
new_data_frame <- original_data_frame %>%
pivot_longer(cols = row containing variables,
names_to = "Variable name for the original column names",
values_to = "Variable name for the values of the variables")
df_long
pivot_wider()
pivot_wider()
function to reshape long type data
into wide type data.pivot_wider()
function to convert to wider
format.pivot_wider()
function
new_data_frame <- original_data_frame %>%
pivot_wider(cols = row containing variables,
names_to = "Variable name for the original column names",
values_to = "Variable name for the values of the variables")
df_wide
separate()
Data Preparation:
COVID19_Worldwide.csv
・Here, we use data COVID19_Worldwide.csv
independently collected from the internet, etc., by Professor Jaehyun
SONG (宋財泫) of Kansai University.
Variable Name | Details |
---|---|
ID | |
Country | Country Name |
Date | Date (Year-Month-Day) |
Confirmed_Day | Number of New COVID-19 Cases per Day |
Confirmed_Total | Total Cumulative Number of COVID-19 Cases |
Death_Day | Number of New COVID-19 Deaths per Day |
Death_Total | Total Cumulative Number of COVID-19 Deaths |
Test_Day | Number of New COVID-19 Tests per Day |
Test_Total | Total Cumulative Number of COVID-19 Tests |
covid_df <- read_csv("data/COVID19_Worldwide.csv",
guess_max = 10000)
# Set to determine the data type after reading the first 10,000 rows
[1] "ID" "Country" "Date" "Confirmed_Day"
[5] "Confirmed_Total" "Death_Day" "Death_Total" "Test_Day"
[9] "Test_Total"
Variable Name | Details |
---|---|
ID | |
Country | Country Name |
Date | Date (Year-Month-Day) |
Confirmed_Total | Total Cumulative Number of COVID-19 Cases |
Test_Day | Number of New COVID-19 Tests per Day |
1. one column | = one variable |
2. one row | = one observation |
3. one cell | = one value |
4. one table | = one unit of analysis |
→ This data is tidy data (=long type).
→ There is no need to transform the data. - Display the descriptive
statistics of the variables in covid_df
.
{r, results = "asis"}
in the chunk
optionsStatistic | N | Mean | St. Dev. | Min | Max |
Confirmed_Total | 31,806 | 18,250.14 | 115,471.60 | 0 | 3,184,582 |
Death_Total | 31,806 | 1,039.01 | 6,565.51 | 0 | 134,094 |
separate()
function.separate()
column of
covid_df
into Year, Month, and Day.How to use separate()
function
data frame %>%
separate(col = "Variable to split",
into = c("Name of variable 1 after splitting",
"Name of variable 2 after splitting",
"Name of variable 3 after splitting", ...),
sep = "Criterion for splitting")
2020/1/22
, it is split into “2020”, “1”,
and “22” based on the criterion of “/
”.Total Number of Infections
and
Total Number of Deaths
by country for the year 2020 (from
January 22nd to July 10th).death_country <- df1 %>%
group_by(Country, Year) %>%
summarise(Death = sum(Death_Total),
Infected = sum(Confirmed_Total))
The descriptive statistics for the death_country
are
as follows:
Remember to specify {r, results = "asis"}
in the
chunk options
Statistic | N | Mean | St. Dev. | Min | Max |
Death | 186 | 177,670.60 | 782,130.30 | 0 | 8,616,010 |
Infected | 186 | 3,120,774.00 | 13,105,502.00 | 731 | 160,231,690 |
plot_1 <- death_country %>%
ggplot(aes(Infected, Death)) +
geom_point() +
stat_smooth(method = lm) +
ggrepel::geom_text_repel(aes(label = Country),
size = 3,
family = "HiraKakuPro-W3") +
labs(x = "Total Cumulative Number of COVID-19 Cases", y = "Total Cumulative Number of COVID-19 Deaths")+
theme_bw(base_family = "HiraKakuProN-W3")
plot_1
plot_2 <- death_country %>%
filter(Country != "United States") %>%
ggplot(aes(Infected, Death)) +
geom_point() +
stat_smooth(method = lm) +
ggrepel::geom_text_repel(aes(label = Country),
size = 3,
family = "HiraKakuPro-W3") +
labs(x = "Total Cumulative Number of COVID-19 Cases", y = "Total Cumulative Number of COVID-19 Deaths")+
theme_bw(base_family = "HiraKakuProN-W3")
plot_2
1. Sorting Rows: arrange()
and answer the
following questions:Q1: Among the candidates of the 2021 general election, list the
top 10 candidates in descending order of the number of votes
(vote
) obtained.
Q2: Among the candidates of the 2021 general election, list the
top 10 candidates in descending order of vote share
(voteshare
).
Refer to 7.3 separate()
and answer the following
questions:
Use COVID19_Worldwide.csv data in the analysis.
Q1: Draw a scatter plot for the year 2020 (January 22nd to July
10th) with Test_Total
on the x-axis and
Confirmed_Total
on the y-axis.
Q2: Draw a scatter plot for the year 2020 (January 22nd to July
10th) with Test_Total
on the x-axis and
Confirmed_Total
on the y-axis. If there are outliers, show
a scatter plot excluding them.