code for starting fresh?
rm(list = ls()) #this function clears the memory.
code for opening a file from a url (file in Excel), name it ‘fragility23’?
linkGit="https://github.com/DACSS-Fundamentals/overview/raw/refs/heads/main/FSI-2023-DOWNLOAD.xlsx" #this function obtains the data we are seeking to analyze
library(rio) # package needed
fragility23=rio::import(file = linkGit) #this function loads the rio package into the r session which is necessary step in obtaining the data.
find out column names in ‘fragility23’?
names(x = fragility23)#this function provides the column names of the dataset.
## [1] "Country" "Year"
## [3] "Rank" "Total"
## [5] "S1: Demographic Pressures" "S2: Refugees and IDPs"
## [7] "C3: Group Grievance" "E3: Human Flight and Brain Drain"
## [9] "E2: Economic Inequality" "E1: Economy"
## [11] "P1: State Legitimacy" "P2: Public Services"
## [13] "P3: Human Rights" "C1: Security Apparatus"
## [15] "C2: Factionalized Elites" "X1: External Intervention"
Check the data types in df?
str(object = fragility23) #this function provides the format type of the numeric columns.
## 'data.frame': 179 obs. of 16 variables:
## $ Country : chr "Somalia" "Yemen" "South Sudan" "Congo Democratic Republic" ...
## $ Year : num 2023 2023 2023 2023 2023 ...
## $ Rank : chr "1st" "2nd" "3rd" "4th" ...
## $ Total : num 112 109 109 107 107 ...
## $ S1: Demographic Pressures : num 10 9.6 9.7 9.7 7.4 9.2 8.8 9.3 9.5 8.8 ...
## $ S2: Refugees and IDPs : num 9 9.6 10 9.8 9.1 8.6 9.6 9.5 9 7.7 ...
## $ C3: Group Grievance : num 8.7 8.8 8.6 9.4 9.1 8.3 9.3 8.1 8.1 5.5 ...
## $ E3: Human Flight and Brain Drain: num 8.6 6.4 6.5 6.4 8 8.5 7.5 6.2 7.7 8.3 ...
## $ E2: Economic Inequality : num 9.1 7.9 8.6 8.4 6.5 8.2 8.5 9.6 8.7 9.2 ...
## $ E1: Economy : num 9.5 9.9 8.6 8.1 9.6 9.6 9.3 8.2 8.4 8.9 ...
## $ P1: State Legitimacy : num 9.6 9.8 9.8 9.3 10 9.4 9.4 8.9 9.1 9.9 ...
## $ P2: Public Services : num 9.8 9.6 9.7 9.3 9 10 8.6 10 9.6 9.8 ...
## $ P3: Human Rights : num 9 9.6 8.7 9.3 9.1 8.7 9.2 9.1 8.4 8.7 ...
## $ C1: Security Apparatus : num 9.5 8.6 9.9 8.8 9.4 9.7 8.3 8 8.7 6.8 ...
## $ C2: Factionalized Elites : num 10 9.9 9.2 9.6 9.9 8.7 9.6 9.4 9.5 9.7 ...
## $ X1: External Intervention : num 9.1 9.2 9.2 9.1 10 7.7 8.1 9.4 7.9 9.6 ...
show me the first 10 rows?
head(x = fragility23,10) #this function provides a view of the first ten rows of data.
## Country Year Rank Total S1: Demographic Pressures
## 1 Somalia 2023 1st 111.9 10.0
## 2 Yemen 2023 2nd 108.9 9.6
## 3 South Sudan 2023 3rd 108.5 9.7
## 4 Congo Democratic Republic 2023 4th 107.2 9.7
## 5 Syria 2023 5th 107.1 7.4
## 6 Afghanistan 2023 6th 106.6 9.2
## 7 Sudan 2023 7th 106.2 8.8
## 8 Central African Republic 2023 8th 105.7 9.3
## 9 Chad 2023 9th 104.6 9.5
## 10 Haiti 2023 10th 102.9 8.8
## S2: Refugees and IDPs C3: Group Grievance E3: Human Flight and Brain Drain
## 1 9.0 8.7 8.6
## 2 9.6 8.8 6.4
## 3 10.0 8.6 6.5
## 4 9.8 9.4 6.4
## 5 9.1 9.1 8.0
## 6 8.6 8.3 8.5
## 7 9.6 9.3 7.5
## 8 9.5 8.1 6.2
## 9 9.0 8.1 7.7
## 10 7.7 5.5 8.3
## E2: Economic Inequality E1: Economy P1: State Legitimacy P2: Public Services
## 1 9.1 9.5 9.6 9.8
## 2 7.9 9.9 9.8 9.6
## 3 8.6 8.6 9.8 9.7
## 4 8.4 8.1 9.3 9.3
## 5 6.5 9.6 10.0 9.0
## 6 8.2 9.6 9.4 10.0
## 7 8.5 9.3 9.4 8.6
## 8 9.6 8.2 8.9 10.0
## 9 8.7 8.4 9.1 9.6
## 10 9.2 8.9 9.9 9.8
## P3: Human Rights C1: Security Apparatus C2: Factionalized Elites
## 1 9.0 9.5 10.0
## 2 9.6 8.6 9.9
## 3 8.7 9.9 9.2
## 4 9.3 8.8 9.6
## 5 9.1 9.4 9.9
## 6 8.7 9.7 8.7
## 7 9.2 8.3 9.6
## 8 9.1 8.0 9.4
## 9 8.4 8.7 9.5
## 10 8.7 6.8 9.7
## X1: External Intervention
## 1 9.1
## 2 9.2
## 3 9.2
## 4 9.1
## 5 10.0
## 6 7.7
## 7 8.1
## 8 9.4
## 9 7.9
## 10 9.6
show me the last 10 rows?
tail(fragility23,10) #this function provides a view of the final ten rows of data.
## Country Year Rank Total S1: Demographic Pressures
## 170 Sweden 2023 170th 20.6 3.0
## 171 Luxembourg 2023 172nd 19.5 2.4
## 172 Ireland 2023 171st 19.5 2.8
## 173 Canada 2023 173rd 18.9 1.2
## 174 Denmark 2023 174th 17.9 2.3
## 175 Switzerland 2023 175th 17.8 2.4
## 176 New Zealand 2023 176th 16.7 1.1
## 177 Finland 2023 177th 16.0 1.7
## 178 Iceland 2023 178th 15.7 1.5
## 179 Norway 2023 179th 14.5 1.4
## S2: Refugees and IDPs C3: Group Grievance E3: Human Flight and Brain Drain
## 170 3.7 2.3 0.6
## 171 2.8 1.5 1.7
## 172 1.6 0.5 2.5
## 173 2.0 2.0 0.7
## 174 3.0 3.1 1.0
## 175 3.2 2.1 1.0
## 176 1.2 2.0 1.6
## 177 1.9 0.3 1.5
## 178 1.5 0.5 1.6
## 179 1.7 3.1 0.7
## E2: Economic Inequality E1: Economy P1: State Legitimacy
## 170 2.3 1.3 0.5
## 171 1.8 2.4 0.3
## 172 1.8 1.7 0.5
## 173 2.5 1.4 0.4
## 174 1.8 1.0 0.3
## 175 2.4 1.6 0.3
## 176 2.6 2.6 0.5
## 177 1.6 2.7 0.4
## 178 1.5 2.6 0.4
## 179 1.4 1.4 0.4
## P2: Public Services P3: Human Rights C1: Security Apparatus
## 170 1.0 1.5 2.1
## 171 1.3 1.1 0.4
## 172 1.9 1.6 2.1
## 173 1.7 1.9 2.2
## 174 1.7 0.6 1.1
## 175 1.6 0.4 1.4
## 176 1.1 0.5 1.6
## 177 1.0 0.5 2.0
## 178 0.9 0.4 0.4
## 179 1.0 0.4 1.4
## C2: Factionalized Elites X1: External Intervention
## 170 1.8 0.5
## 171 3.4 0.4
## 172 1.5 1.0
## 173 2.5 0.4
## 174 1.4 0.6
## 175 1.0 0.4
## 176 1.4 0.5
## 177 1.4 1.0
## 178 1.8 2.6
## 179 1.1 0.5
keep some columns: Country, Total, S1: Demographic Pressures, P1: State Legitimacy, E2: Economic Inequality into object ‘frag23_sub’
fragility23[,c(5,11,9)] #this function subfilters by specified columns.
## S1: Demographic Pressures P1: State Legitimacy E2: Economic Inequality
## 1 10.0 9.6 9.1
## 2 9.6 9.8 7.9
## 3 9.7 9.8 8.6
## 4 9.7 9.3 8.4
## 5 7.4 10.0 6.5
## 6 9.2 9.4 8.2
## 7 8.8 9.4 8.5
## 8 9.3 8.9 9.6
## 9 9.5 9.1 8.7
## 10 8.8 9.9 9.2
## 11 9.8 8.2 7.5
## 12 7.0 9.3 7.3
## 13 8.8 8.6 7.2
## 14 8.8 9.7 7.5
## 15 9.6 8.2 8.1
## 16 8.7 8.9 7.8
## 17 6.0 9.6 6.4
## 18 7.3 6.4 4.9
## 19 7.7 9.7 7.9
## 20 8.7 9.1 7.2
## 21 8.3 7.4 8.3
## 22 9.6 7.1 9.2
## 23 9.0 8.8 7.4
## 24 8.9 6.6 7.8
## 25 5.8 7.7 6.6
## 26 9.2 8.2 7.4
## 27 8.2 8.3 5.7
## 28 8.5 9.1 7.5
## 29 6.5 9.6 6.9
## 30 7.2 8.0 6.0
## 31 8.9 9.1 8.9
## 32 8.3 7.4 4.9
## 33 8.4 6.2 7.5
## 34 8.3 9.1 4.9
## 35 8.1 7.3 7.4
## 36 8.6 7.5 7.5
## 37 8.6 7.8 7.2
## 38 7.3 9.8 7.3
## 39 9.3 8.1 8.8
## 40 6.8 9.8 4.5
## 41 7.7 7.5 5.9
## 42 9.1 9.9 7.6
## 43 9.6 7.5 7.8
## 44 7.2 6.7 7.8
## 45 8.2 8.0 7.7
## 46 7.1 8.8 7.2
## 47 7.4 7.6 8.1
## 48 9.4 6.7 9.1
## 49 9.6 6.7 9.2
## 50 6.7 8.6 4.5
## 51 8.4 5.5 7.5
## 52 4.5 7.3 6.6
## 53 4.8 9.1 5.2
## 54 6.4 9.1 6.4
## 55 8.1 6.6 5.3
## 56 7.1 6.2 7.8
## 57 6.9 6.8 6.7
## 58 8.7 8.8 7.8
## 59 7.8 5.3 6.4
## 60 7.4 5.7 8.2
## 61 7.9 6.8 4.7
## 62 5.0 9.1 6.7
## 63 8.7 4.9 5.9
## 64 7.1 6.8 6.5
## 65 8.6 6.9 7.3
## 66 8.6 4.9 7.9
## 67 8.2 6.4 5.9
## 68 6.3 6.9 4.4
## 69 5.7 8.2 4.9
## 70 6.4 8.9 5.3
## 71 8.7 7.1 6.8
## 72 7.0 9.4 4.3
## 73 8.5 4.5 5.8
## 74 8.0 5.3 7.8
## 75 7.6 7.3 6.2
## 76 3.8 9.2 4.5
## 77 6.1 6.4 3.9
## 78 8.0 6.1 6.9
## 79 4.7 8.0 4.4
## 80 7.4 3.7 6.3
## 81 6.9 6.4 7.6
## 82 4.6 6.1 5.0
## 83 5.0 7.6 5.2
## 84 4.8 9.4 3.3
## 85 6.9 6.3 5.7
## 86 6.9 3.8 6.7
## 87 6.7 6.4 6.2
## 88 5.8 4.1 6.8
## 89 6.8 5.2 5.1
## 90 4.4 6.8 4.8
## 91 6.1 7.5 4.3
## 92 4.1 5.6 4.3
## 93 3.8 6.3 2.8
## 94 5.4 4.9 3.6
## 95 4.3 9.0 5.3
## 96 5.4 2.8 4.3
## 97 4.3 5.7 4.1
## 98 7.0 4.7 4.4
## 99 6.7 8.1 5.8
## 100 4.3 7.6 4.8
## 101 4.7 4.1 3.7
## 102 4.1 8.0 5.7
## 103 6.5 8.1 6.1
## 104 5.6 9.8 5.6
## 105 5.7 6.4 6.9
## 106 5.2 6.9 2.6
## 107 7.4 3.2 6.6
## 108 4.8 4.2 5.3
## 109 5.8 4.4 3.9
## 110 6.4 5.4 4.7
## 111 4.1 8.4 2.6
## 112 8.1 3.1 6.9
## 113 4.2 4.5 4.3
## 114 6.2 3.5 5.0
## 115 5.0 3.4 3.6
## 116 5.5 4.0 4.7
## 117 5.9 7.6 3.8
## 118 4.4 8.0 3.5
## 119 4.1 3.9 2.4
## 120 3.4 4.2 4.5
## 121 4.1 5.0 2.9
## 122 7.8 2.5 6.8
## 123 4.1 5.8 3.2
## 124 5.1 6.5 3.5
## 125 2.8 7.7 6.9
## 126 3.7 3.6 5.1
## 127 4.1 3.7 4.6
## 128 4.2 3.5 5.3
## 129 4.5 4.4 4.0
## 130 4.1 3.5 4.7
## 131 5.4 3.5 4.7
## 132 4.8 4.0 5.2
## 133 4.4 7.2 4.8
## 134 6.3 1.8 4.7
## 135 3.5 6.2 2.6
## 136 4.9 3.7 6.0
## 137 4.2 7.2 4.1
## 138 4.8 2.3 2.9
## 139 5.7 3.4 5.3
## 140 3.7 1.2 5.0
## 141 5.2 4.2 4.0
## 142 3.8 3.7 2.4
## 143 4.8 3.3 3.6
## 144 3.3 6.3 3.5
## 145 3.2 1.8 3.7
## 146 4.6 2.0 2.9
## 147 5.9 4.4 4.9
## 148 3.4 3.6 3.7
## 149 3.0 6.4 5.3
## 150 2.9 1.2 5.1
## 151 3.2 3.9 1.9
## 152 3.6 1.2 4.1
## 153 2.9 1.0 3.0
## 154 3.1 2.3 3.1
## 155 2.6 3.2 2.2
## 156 3.6 6.4 3.1
## 157 3.4 0.4 3.3
## 158 3.7 2.9 2.3
## 159 2.5 2.4 2.8
## 160 4.2 0.8 2.2
## 161 5.9 0.3 2.9
## 162 3.1 1.0 2.9
## 163 3.6 1.4 2.7
## 164 4.0 0.8 2.4
## 165 2.5 3.7 3.4
## 166 2.3 0.7 2.7
## 167 3.6 0.6 2.5
## 168 3.8 0.4 2.6
## 169 2.5 0.3 1.8
## 170 3.0 0.5 2.3
## 171 2.4 0.3 1.8
## 172 2.8 0.5 1.8
## 173 1.2 0.4 2.5
## 174 2.3 0.3 1.8
## 175 2.4 0.3 2.4
## 176 1.1 0.5 2.6
## 177 1.7 0.4 1.6
## 178 1.5 0.4 1.5
## 179 1.4 0.4 1.4
grep(pattern = "Country|S1|P1|E2|Total",x = names(fragility23),fixed = F,value = T) #this function searches for a specified set of characters It is test function.
## [1] "Country" "Total"
## [3] "S1: Demographic Pressures" "E2: Economic Inequality"
## [5] "P1: State Legitimacy"
keep=grep("Country|S1|P1|E2|Total",names(fragility23),fixed = F,value = T)
frag23_sub=fragility23[,keep] # this function keeps the results that match most to the intial grep pattern search.
rename ‘frag23_sub’ columns, keep var codes only
names(frag23_sub)[3:5]=c("S1","E2", "P1") #this function renames specified columns to the abbreviations noted in the code.
get me the top ten best countries on the ‘E2’ measure from ‘frag23_sub’
tail(frag23_sub[order(x=-frag23_sub$E2),],10) #Option 1, we are requesting the tail figures, because the lower scores are more impressive.
## Country Total S1 E2 P1
## 155 Slovak Republic 37.8 2.6 2.2 3.2
## 160 Belgium 31.4 4.2 2.2 0.8
## 151 Czech Republic 40.2 3.2 1.9 3.9
## 169 Netherlands 21.0 2.5 1.8 0.3
## 171 Luxembourg 19.5 2.4 1.8 0.3
## 172 Ireland 19.5 2.8 1.8 0.5
## 174 Denmark 17.9 2.3 1.8 0.3
## 177 Finland 16.0 1.7 1.6 0.4
## 178 Iceland 15.7 1.5 1.5 0.4
## 179 Norway 14.5 1.4 1.4 0.4
tail(frag23_sub[order(x=-frag23_sub$E2),'Country'],10) #Option 2,but accomplished the same request. This function just lists the countries.
## [1] "Slovak Republic" "Belgium" "Czech Republic" "Netherlands"
## [5] "Luxembourg" "Ireland" "Denmark" "Finland"
## [9] "Iceland" "Norway"
give the statistical description of “frag23_sub”
summary(object = frag23_sub) #this function provides basic statistical information for S1, E2, and P1, along with totals
## Country Total S1 E2
## Length:179 Min. : 14.50 Min. : 1.100 Min. :1.400
## Class :character 1st Qu.: 49.00 1st Qu.: 4.100 1st Qu.:3.650
## Mode :character Median : 68.20 Median : 5.900 Median :5.200
## Mean : 65.83 Mean : 5.956 Mean :5.323
## 3rd Qu.: 82.20 3rd Qu.: 8.050 3rd Qu.:7.200
## Max. :111.90 Max. :10.000 Max. :9.600
## P1
## Min. : 0.300
## 1st Qu.: 3.650
## Median : 6.400
## Mean : 5.741
## 3rd Qu.: 8.100
## Max. :10.000
the value of the worst quartile in Total
q3_Total=quantile(x = frag23_sub$Total,
probs = 0.75,
na.rm = TRUE) #this function provides the 75th percentile numbers as the higher value scores indicate a less positive condition
show correlations between “S1”,“E2”, “P1”
cor(x=frag23_sub[,-c(1,2)]) #shows the correlations among the selected variables
## S1 E2 P1
## S1 1.0000000 0.8537425 0.6571171
## E2 0.8537425 1.0000000 0.6661759
## P1 0.6571171 0.6661759 1.0000000
library(corrtable)
corrtable::correlation_matrix(df = frag23_sub[,-c(1,2)]) #shows the correlations among the selections, similar to the above but contains asterisks to note stiatitical significance
## S1 E2 P1
## S1 "1.000 " "0.854***" "0.657***"
## E2 "0.854***" "1.000 " "0.666***"
## P1 "0.657***" "0.666***" "1.000 "
regress P1 and E2 on S1
lm(S1~P1+E2,data=frag23_sub)
##
## Call:
## lm(formula = S1 ~ P1 + E2, data = frag23_sub)
##
## Coefficients:
## (Intercept) P1 E2
## 0.8536 0.1248 0.8239
model <- lm(S1 ~ P1 + E2, data = frag23_sub) #models the regression expressed.
summary(model) #provides a summary
##
## Call:
## lm(formula = S1 ~ P1 + E2, data = frag23_sub)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.6991 -0.8670 0.1260 0.7808 2.6197
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.85359 0.24234 3.522 0.000545 ***
## P1 0.12477 0.04024 3.100 0.002250 **
## E2 0.82389 0.05645 14.594 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.162 on 176 degrees of freedom
## Multiple R-squared: 0.7429, Adjusted R-squared: 0.74
## F-statistic: 254.3 on 2 and 176 DF, p-value: < 2.2e-16
some plotting
give me a plot for the ‘P1’ variable
hist(x = frag23_sub$P1) #creates a histogram based on P1
visual correlation between S1 and E2
plot(x=frag23_sub$S1, y=frag23_sub$E2) #plots the correlations of the specified variables.
Color points if country is on worst quartile of total.
frag23_sub$Total>=q3_Total
## [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [13] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [25] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [37] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
frag23_sub$worstQt=frag23_sub$Total>=q3_Total #provides the listing of countries that meet or do not meet the condition of worst quartile, true or false.
plot(frag23_sub$S1,
frag23_sub$E2,pch=20,
col = as.factor(frag23_sub$worstQt))#actual color plots of the worst quartile
visual of the regression P1 and E2 on S1
library(sjPlot)
plot_models(model) #provides a visual model of the regression P1 and E2 on S1
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the sjPlot package.
## Please report the issue at <https://github.com/strengejacke/sjPlot/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Ignoring unknown labels:
## • shape : "p-level"