library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(cowplot)
library(forcats)
library(knitr)
survey <- read.csv("mt_uk_survey_data.csv", na.strings=c("")) #replaces blank cells with NA
#education
colnames(survey)[3] <- "education"
survey$education <- fct_explicit_na(survey$education) #Makes 28 missing responses explicit
edu <- count(survey, education)
edu$pc <- round((edu$n/sum(edu$n))*100,1)
ed <- ggplot(edu, aes(x=reorder(education, n), y=n)) +
geom_col(width=0.8, fill="#E69F00") + ylim(0,515)+
geom_text(aes(label=paste0(pc, "%")), hjust=0, size=3.2) + ylab("Count") +xlab("") + coord_flip() +ggtitle("Education") + theme(plot.title = element_text(size = 9))
#employment
survey$employment_status <- gsub("0", "(Missing)", survey$employment_status) #replaces 12 missing data cells (0) with "(Missing)"
survey$employment_status <- gsub("DATA EXPIRED", "(Missing)", survey$employment_status) #replaces 3 cells with expired data with "(Missing)"
emp <- count(survey, employment_status)
emp$pc <- round((emp$n/sum(emp$n))*100,1)
em <- ggplot(emp, aes(x=reorder(employment_status, n), y=n)) +
geom_col(width=0.8, fill="#56B4E9") + scale_y_continuous(limit = c(0,570), breaks = c(0, 100, 200, 300, 400, 500)) +
geom_text(aes(label=paste0(pc, "%")), hjust=0, size=3.2) + ylab("Count") +xlab("") + coord_flip() +ggtitle("Employment") + theme(plot.title = element_text(size = 9))
demo_plot <- plot_grid(ed, em, align = "v", ncol = 1)
demo_plot
#students
survey$student <- gsub("0", "(Missing)", survey$student) #replaces 6 missing data cells (0) with "(Missing)"
students <- count(survey, student)
students$pc <- round((students$n/sum(students$n))*100,1)
students <- arrange(students, -n)
kable(students, row.names = FALSE, col.names=c("Student","n", "%")) #as per Prolific details
Student | n | % |
---|---|---|
No | 1052 | 87.7 |
Yes | 142 | 11.8 |
(Missing) | 6 | 0.5 |
#native language
colnames(survey)[4] <- "native_lang"
survey$native_lang <- fct_explicit_na(survey$native_lang) #Makes 7 cells with missing data explicit
survey$native_lang <- gsub("HE Administration", "(Missing)", survey$native_lang) #treats one invalid answer (HE Administration) as a case of "(Missing)"
native.language <- survey[ which(!is.na(survey$native_lang)), ] #excludes 7 blank responses
native.language <- subset(native.language, native.language$native_lang != "HE Administration") #excludes 1 mistaken answer that says "HE Administration"
en.native <- length(grep("English|British|Engish|Engligh|englsh|Enlish|Ennglish|\\<En\\>",native.language$native_lang, ignore.case=TRUE)) #includes respondents who said 'British' was their native language
en.native.pc <- round((en.native/nrow(native.language))*100,1)
non.en.native <- length(grep("English|British|Engish|Engligh|englsh|Enlish|Ennglish|\\<En\\>|(Missing)",native.language$native_lang, ignore.case=TRUE, invert=TRUE))
non.en.native.pc <- round((non.en.native/nrow(native.language))*100,1)
missing <- length(grep("(Missing)", native.language$native_lang))
missing.pc <- round((missing/nrow(native.language))*100,1)
Native_speaker_of_English <- c("Yes", "No", "(Missing)")
n <- c(en.native, non.en.native, missing)
pc <- c(en.native.pc, non.en.native.pc, missing.pc)
lang.native <- data.frame(Native_speaker_of_English, n, pc)
kable(lang.native, row.names = FALSE, col.names=c("Native speaker of English?","n", "%"))
Native speaker of English? | n | % |
---|---|---|
Yes | 1132 | 94.3 |
No | 60 | 5.0 |
(Missing) | 8 | 0.7 |
#other language (at least basic knowledge)
colnames(survey)[5] <- "language_yn"
survey$language_yn <- fct_explicit_na(survey$language_yn) #makes 8 missing responses explicit
language <- count(survey, language_yn)
language$pc <- round((language$n/sum(language$n))*100,1)
language <- arrange(language, -n)
kable(language, row.names = FALSE, col.names=c("Are there other languages you know at least a little (for example, enough to read a restaurant menu)?","n", "%"))
Are there other languages you know at least a little (for example, enough to read a restaurant menu)? | n | % |
---|---|---|
Yes | 724 | 60.3 |
No | 468 | 39.0 |
(Missing) | 8 | 0.7 |
#French
colnames(survey)[6] <- "other_languages"
lang.nonnative.fr <- length(grep("French|Frech|Franch|\\<Fr\\>",survey$other_languages, ignore.case=TRUE))
lang.nonnative.fr.pc <- round((lang.nonnative.fr/nrow(survey))*100,1)
n <- c(lang.nonnative.fr)
pc <- c(lang.nonnative.fr.pc)
lang.French <- data.frame(n, pc)
kable(lang.French, row.names = FALSE, col.names=c("Respondents with some French", "%"))
Respondents with some French | % |
---|---|
457 | 38.1 |
#Closed-ended results ## Used MT before
colnames(survey)[8] <- "MTuse_yn"
MTuse <- within(survey, MTuse_yn[Other.3 == "A tried to answer NO but it kept freezing"] <- 0) #changing MTuse_yn value to 'no' (0) for respondent who selected 'yes' because of technical error
MTyn <- count(MTuse, MTuse_yn)
MTyn$pc <- round((MTyn$n/sum(MTyn$n))*100,1)
MTyn <- MTyn %>%
mutate(MTuse_yn = ifelse(MTuse_yn == 0,"No","Yes"))
kable(MTyn, row.names=FALSE, col.names = c("Have you used automatic translators before?", "n", "%"))
Have you used automatic translators before? | n | % |
---|---|---|
No | 289 | 24.1 |
Yes | 911 | 75.9 |
MTy <- MTuse[ which(MTuse$MTuse_yn != 0), ] #excludes 289 entries by those who had not used MT before
MTy_n <- nrow(MTy) #911
#MT systems and/or interfaces
#The different options reflect how users normally came into contact with the tools. The underlying systems will in some cases be the same.
gt <- sum(!is.na(MTy$Google.Translate))
ms <- sum(!is.na(MTy$Bing.or.Microsoft.Translator))
deepl <- sum(!is.na(MTy$DeepL))
itrans <- sum(!is.na(MTy$iTranslate))
fb <- sum(!is.na(MTy$The.default.automatic.translator.in.Facebook))
twitter <- sum(!is.na(MTy$The.default.automatic.translator.in.Twitter))
word <- sum(!is.na(MTy$The.default.automatic.translator.in.Microsoft.Word))
chrome <- sum(!is.na(MTy$The.default.automatic.translator.in.Google.Chrome))
ie <- sum(!is.na(MTy$The.default.automatic.translator.in.Internet.Explorer))
transall <- sum(!is.na(MTy$TranslateAll))
skype <- sum(!is.na(MTy$Skype.Translator))
reverso <- sum(!is.na(MTy$Reverso))
systran <- sum(!is.na(MTy$Systran))
promt <- sum(!is.na(MTy$PROMT))
freetrans <- sum(!is.na(MTy$FreeTranslation.com))
yandex <- sum(!is.na(MTy$Yandex))
babelfish <- sum(!is.na(MTy$BabelFish))
yahoo <- sum(!is.na(MTy$Yahoo..Babel.Fish.or.Altavista.Translation.Service))
assistant <- sum(!is.na(MTy$Google.Assistant))
other <- sum(!is.na(MTy$Other.3))
systems.interfaces <- c("Google Translate", "Microsoft Translator", "DeepL", "iTranslate", "Facebook", "Twitter", "Microsoft Word", "Google Chrome", "Internet Explorer", "TranslateAll", "Skype", "Reverso", "Systran", "PROMT", "Freetranslation.com", "Yandex", "Babelfish", "Yahoo/Babel Fish", "Google Assistant", "Other")
systems.counts <- c(gt, ms, deepl, itrans, fb, twitter, word, chrome, ie, transall, skype, reverso, systran, promt, freetrans, yandex, babelfish, yahoo, assistant, other)
systems.pc <- c(round((gt/MTy_n)*100,1), round((ms/MTy_n)*100,1), round((deepl/MTy_n)*100,1), round((itrans/MTy_n)*100,1), round((fb/MTy_n)*100,1), round((twitter/MTy_n)*100,1), round((word/MTy_n)*100,1), round((chrome/MTy_n)*100,1), round((ie/MTy_n)*100,1), round((transall/MTy_n)*100,1), round((skype/MTy_n)*100,1), round((reverso/MTy_n)*100,1), round((systran/MTy_n)*100,1), round((promt/MTy_n)*100,1), round((freetrans/MTy_n)*100,1),round((yandex/MTy_n)*100,1), round((babelfish/MTy_n)*100,1), round((yahoo/MTy_n)*100,1), round((assistant/MTy_n)*100,1), round((other/MTy_n)*100,1))
systems_df <- data.frame(systems.interfaces, systems.counts, systems.pc)
systems_df <- systems_df[order(-systems.counts),]
kable(systems_df, row.names=FALSE,col.names=c("Systems/interfaces", "n", "%"))
Systems/interfaces | n | % |
---|---|---|
Google Translate | 867 | 95.2 |
Google Chrome | 227 | 24.9 |
207 | 22.7 | |
Babelfish | 122 | 13.4 |
90 | 9.9 | |
Google Assistant | 62 | 6.8 |
Microsoft Translator | 46 | 5.0 |
Internet Explorer | 29 | 3.2 |
Microsoft Word | 21 | 2.3 |
iTranslate | 18 | 2.0 |
Yahoo/Babel Fish | 18 | 2.0 |
Other | 11 | 1.2 |
Freetranslation.com | 8 | 0.9 |
Reverso | 7 | 0.8 |
DeepL | 5 | 0.5 |
Skype | 5 | 0.5 |
Yandex | 5 | 0.5 |
Systran | 3 | 0.3 |
PROMT | 2 | 0.2 |
TranslateAll | 0 | 0.0 |
##Use context
leisure <- nrow(MTy[ which(MTy$Leisure.1 =="Leisure"), ])
study <- nrow(MTy[ which(MTy$Study.1 =="Study"), ])
work <- nrow(MTy[ which(MTy$Work.1 =="Work"), ])
other <- sum(!is.na(MTy$Other.4))
missing.context <- sum(is.na(MTy$Leisure.1) & is.na(MTy$Study.1) & is.na(MTy$Work.1) & is.na(MTy$Other.4))
contexts <- c("Leisure", "Study", "Work", "Other", "No response")
contexts_counts <- c(leisure, study, work, other, missing.context)
contexts_pc <- c(round((leisure/MTy_n)*100,1), round((study/MTy_n)*100,1), round((work/MTy_n)*100,1), round((other/MTy_n)*100,1), round((missing.context/MTy_n)*100,1))
contexts_df <- data.frame(contexts, contexts_counts, contexts_pc)
contexts_df <- contexts_df[order(-contexts_counts),]
kable(contexts_df, row.names=FALSE, col.names=c("Please select why you used automatic translators", "n", "%")) #multiple choice allowed, hence pc > 100%
Please select why you used automatic translators | n | % |
---|---|---|
Leisure | 730 | 80.1 |
Work | 253 | 27.8 |
Study | 208 | 22.8 |
Other | 54 | 5.9 |
No response | 13 | 1.4 |
##Location of use
abroad <- nrow(MTy[ which(MTy$Abroad.1 =="Abroad"), ])
uk <- nrow(MTy[ which(MTy$In.the.UK.1 =="In the UK"), ])
missing.loc <- sum(is.na(MTy$Abroad.1) & is.na(MTy$In.the.UK.1))
location <- c("Abroad", "UK", "No response")
location_counts <- c(abroad, uk, missing.loc)
location_pc <- c(round((abroad/MTy_n)*100,1), round((uk/MTy_n)*100,1), round((missing.loc/MTy_n)*100,1))
location_df <- data.frame(location, location_counts, location_pc)
location_df <- location_df[order(-location_counts),]
kable(location_df, row.names=FALSE, col.names=c("MT use location", "n", "%")) #multiple choice allowed, hence pc > 100%
MT use location | n | % |
---|---|---|
UK | 832 | 91.3 |
Abroad | 295 | 32.4 |
No response | 5 | 0.5 |
desk.laptop <- sum(!is.na(MTy[71]))
mobile <- sum(!is.na(MTy[72]))
smartwatch <- sum(!is.na(MTy[73]))
speaker <- sum(!is.na(MTy[74]))
tablet <- sum(!is.na(MTy[75]))
other.device <- sum(!is.na(MTy[76]))
missing.device <- sum(is.na(MTy[71]) & is.na(MTy[72]) & is.na(MTy[73]) & is.na(MTy[74]) & is.na(MTy[75]) & is.na(MTy[76]))
devices_names <- c("Desktop or laptop computer", "Mobile phone",
"Smartwatch" ,
"Smart speaker or smart home device",
"Tablet",
"Other",
"No response")
devices_counts <- c(desk.laptop, mobile, smartwatch, speaker, tablet, other.device, missing.device)
devices_pc <- c(round((desk.laptop/MTy_n)*100,1), round((mobile/MTy_n)*100,1), round((smartwatch/MTy_n)*100,1), round((speaker/MTy_n)*100,1), round((tablet/MTy_n)*100,1), round((other.device/MTy_n)*100,1), round((missing.device/MTy_n)*100,1))
devices_df <- data.frame(devices_names, devices_counts, devices_pc)
devices_df <- devices_df[order(-devices_counts),]
kable(devices_df, row.names = FALSE, col.names=c("On what type of device have you used automatic translators?","n","%")) #multiple choice allowed, hence pc > 100%
On what type of device have you used automatic translators? | n | % |
---|---|---|
Desktop or laptop computer | 721 | 79.1 |
Mobile phone | 555 | 60.9 |
Tablet | 182 | 20.0 |
Smart speaker or smart home device | 14 | 1.5 |
Smartwatch | 5 | 0.5 |
No response | 4 | 0.4 |
Other | 1 | 0.1 |
##Use purpose and procedure
read <- sum(!is.na(MTy[58]))
send <- sum(!is.na(MTy[59]))
emergency <- sum(!is.na(MTy[60]))
play <- sum(!is.na(MTy[61]))
transaction <- sum(!is.na(MTy[62]))
browsing <- sum(!is.na(MTy[63]))
social_media <- sum(!is.na(MTy[64]))
online_speech <- sum(!is.na(MTy[65]))
online_texting <- sum(!is.na(MTy[66]))
f2f_typing <- sum(!is.na(MTy[67]))
language_learning <- sum(!is.na(MTy[68]))
f2f_speech <- sum(!is.na(MTy[69]))
other <- sum(!is.na(MTy[70]))
uses_names <- c("I needed to read a text or document in a different language", "I needed to send an e-mail or a message",
"I needed urgent help or was in a situation I consider serious e.g. in hospital or at a police station" ,
"I used it for play or out of curiosity",
"I wanted to buy or sell something",
"I was browsing the internet",
"I was on social media or on an online forum",
"I was speaking out loud to someone online in real time",
"I was texting online in real time",
"I was trying to communicate with someone in person by typing messages in the same physical space",
"I was trying to learn a language or the meaning of something in a different language",
"I was trying to speak out loud with someone in person in the same physical space" ,
"Other")
uses_counts <- c(read, send, emergency, play, transaction, browsing, social_media, online_speech, online_texting, f2f_typing, language_learning, f2f_speech, other)
uses_pc <- c(round((read/MTy_n)*100,1), round((send/MTy_n)*100,1), round((emergency/MTy_n)*100,1), round((play/MTy_n)*100,1), round((transaction/MTy_n)*100,1), round((browsing/MTy_n)*100,1), round((social_media/MTy_n)*100,1), round((online_speech/MTy_n)*100,1), round((online_texting/MTy_n)*100,1), round((f2f_typing/MTy_n)*100,1), round((language_learning/MTy_n)*100,1), round((f2f_speech/MTy_n)*100,1), round((other/MTy_n)*100,1))
uses_df <- data.frame(uses_names, uses_counts, uses_pc)
uses_df <- uses_df[order(-uses_counts),]
kable(uses_df, row.names = FALSE, col.names=c("How would you describe the situation(s) where you used automatic translators?","n","%")) #multiple choice allowed, hence pc > 100%
How would you describe the situation(s) where you used automatic translators? | n | % |
---|---|---|
I needed to read a text or document in a different language | 620 | 68.1 |
I used it for play or out of curiosity | 369 | 40.5 |
I was browsing the internet | 341 | 37.4 |
I was trying to learn a language or the meaning of something in a different language | 259 | 28.4 |
I was on social media or on an online forum | 256 | 28.1 |
I needed to send an e-mail or a message | 212 | 23.3 |
I was trying to communicate with someone in person by typing messages in the same physical space | 111 | 12.2 |
I wanted to buy or sell something | 109 | 12.0 |
I was trying to speak out loud with someone in person in the same physical space | 73 | 8.0 |
I was texting online in real time | 66 | 7.2 |
I was speaking out loud to someone online in real time | 38 | 4.2 |
Other | 32 | 3.5 |
I needed urgent help or was in a situation I consider serious e.g. in hospital or at a police station | 19 | 2.1 |
##Perceptions of Accuracy
colnames(MTy)[77] <- "accuracy"
MTy$accuracy <- fct_explicit_na(MTy$accuracy)
accuracy <- count(MTy, accuracy)
accuracy$pc <- round((accuracy$n/sum(accuracy$n))*100,1)
accuracy <- accuracy[order(-accuracy$n),]
kable(accuracy, row.names = FALSE, col.names=c("Based on your overall experience, would you say automatic translators are often:", "n", "%"))
Based on your overall experience, would you say automatic translators are often: | n | % |
---|---|---|
Accurate | 617 | 67.7 |
Inaccurate | 126 | 13.8 |
Very accurate | 114 | 12.5 |
I don’t know | 37 | 4.1 |
Very inaccurate | 12 | 1.3 |
(Missing) | 5 | 0.5 |
##Perceptions of Risk
colnames(MTy)[78] <- "risk"
risk <- count(MTy, risk)
risk$pc <- round((risk$n/sum(risk$n))*100,1)
risk <- risk[order(-risk$n),]
risk <- risk %>%
mutate(risk = ifelse(risk == 0,"No","Yes"))
kable(risk, row.names = FALSE, col.names=c("Based on your overall experience, did you ever feel using automatic translators was a risk?", "n", "%"))
Based on your overall experience, did you ever feel using automatic translators was a risk? | n | % |
---|---|---|
No | 700 | 76.8 |
Yes | 211 | 23.2 |
##Perceived severity of risk
colnames(MTy)[79] <- "risk_rating"
risk_rating_df <- count(MTy, risk_rating)
risk_rating_df <- na.exclude(risk_rating_df)
risk_rating_df$pc <- round((risk_rating_df$n/sum(risk_rating_df$n))*100,1)
risk_rating_df <- risk_rating_df [order(-risk_rating_df$n),]
kable(risk_rating_df, row.names = FALSE, col.names=c("How would you rate this risk", "n", "%"), caption="1 = very low risk; 5 = very high risk")
How would you rate this risk | n | % |
---|---|---|
3 | 109 | 51.7 |
2 | 54 | 25.6 |
4 | 34 | 16.1 |
1 | 10 | 4.7 |
5 | 4 | 1.9 |
##Perceived type of risk
personal_rep <- sum(!is.na(MTy[80]))
studies <- sum(!is.na(MTy[81]))
financial <- sum(!is.na(MTy[82]))
legal <- sum(!is.na(MTy[83]))
medical <- sum(!is.na(MTy[84]))
professional <- sum(!is.na(MTy[85]))
other <- sum(!is.na(MTy[86]))
risk_types <- c("A risk to my personal reputation",
"A risk to my studies",
"Financial",
"Legal",
"Medical",
"Professional",
"Other")
risk_counts <- c(personal_rep, studies, financial, legal, medical, professional, other)
risk_n <- sum(risk_rating_df$n)
risk_pc <- c(round((personal_rep/risk_n)*100,1), round((studies/risk_n)*100,1), round((financial/risk_n)*100,1), round((legal/risk_n)*100,1), round((medical/risk_n)*100,1), round((professional/risk_n)*100,1), round((other/risk_n)*100,1))
risktypes_df <- data.frame(risk_types, risk_counts, risk_pc)
risktypes_df <- risktypes_df[order(-risk_counts),]
kable(risktypes_df, row.names = FALSE, col.names=c("How would you describe the type of risk?","n","%"))
How would you describe the type of risk? | n | % |
---|---|---|
A risk to my personal reputation | 86 | 40.8 |
A risk to my studies | 69 | 32.7 |
Other | 54 | 25.6 |
Professional | 40 | 19.0 |
Financial | 18 | 8.5 |
Medical | 11 | 5.2 |
Legal | 10 | 4.7 |
##Use decision
colnames(MTy)[87] <- "decision"
MTy$decision <- fct_explicit_na(MTy$decision)
decision <- count(MTy, decision)
decision$pc <- round((decision$n/sum(decision$n))*100,1)
decision <- decision[order(-decision$n),]
kable(decision, row.names = FALSE, col.names=c("Would you say you used automatic translators:", "n", "%"))
Would you say you used automatic translators: | n | % |
---|---|---|
Because it served my purpose well and I wanted to use it | 698 | 76.6 |
For lack of a better alternative | 208 | 22.8 |
(Missing) | 5 | 0.5 |
##Satisfaction
colnames(MTy)[88] <- "satisfaction"
MTy$satisfaction <- fct_explicit_na(MTy$satisfaction)
satisfaction <- count(MTy, satisfaction)
satisfaction$pc <- round((satisfaction$n/sum(satisfaction$n))*100,1)
satisfaction <- satisfaction[order(-satisfaction$n),]
kable(satisfaction, row.names = FALSE, col.names=c("Please rate how satisfied you were with the automatic translator(s) you used:", "n", "%"))
Please rate how satisfied you were with the automatic translator(s) you used: | n | % |
---|---|---|
Satisfied | 572 | 62.8 |
Very satisfied | 274 | 30.1 |
Dissatisfied | 45 | 4.9 |
I don’t know | 10 | 1.1 |
Very dissatisfied | 8 | 0.9 |
(Missing) | 2 | 0.2 |
We set aside a random sample of 100 survey entries for all four coders to code independently. The entries included responses to two questions, so 200 coding units in total. We exclude from the agreement calculation 56 empty responses and 4 responses that one coder coded by accident prior to the independent coding. A resulting sample of 140 coding units across the two questions is used for the agreement check.
There were four .txt files with the independent coding (coders 1, 2, 3, and 4)
We compute all between-coder calculations of fuzzyKappa with the fuzzyKappa.bat file with the following arguments:
fuzzyKappa.exe file1.txt file2.txt -f 1 1 1 -norm MIN -ci 1000
###Output
coding_4 v coding_3
Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.757142857143 Expected agreement = 0.146683673469 Kappa = 0.715396113602 Bootstrap CI at 90: ( 0.645930242772 , 0.780966410031 ) Bootstrap CI at 95: ( 0.63324502706 , 0.793651625743 ) Bootstrap CI at 99: ( 0.607874595636 , 0.819022057167 )
Fuzzy Kappa: Observed agreement = 0.745238095238 Expected agreement = 0.156913265306 Kappa = 0.697822425288 Bootstrap CI at 90: ( 0.643114094777 , 0.751149657628 ) Bootstrap CI at 95: ( 0.632965299479 , 0.761298452926 ) Bootstrap CI at 99: ( 0.612667708883 , 0.781596043522 )
coding_4 v coding_1
Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.764285714286 Expected agreement = 0.154693877551 Kappa = 0.721149203283 Bootstrap CI at 90: ( 0.651697791293 , 0.786854309844 ) Bootstrap CI at 95: ( 0.639001269854 , 0.799550831283 ) Bootstrap CI at 99: ( 0.613608226975 , 0.824943874162 )
Fuzzy Kappa: Observed agreement = 0.742857142857 Expected agreement = 0.172465986395 Kappa = 0.689266117265 Bootstrap CI at 90: ( 0.630680069063 , 0.746392491872 ) Bootstrap CI at 95: ( 0.619810114193 , 0.757262446741 ) Bootstrap CI at 99: ( 0.598070204453 , 0.779002356481 )
coding_3 v coding_1
Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.807142857143 Expected agreement = 0.144132653061 Kappa = 0.774664679583 #highest crisp Kappa Bootstrap CI at 90: ( 0.71084667378 , 0.834906635056 ) Bootstrap CI at 95: ( 0.699192556206 , 0.84656075263 ) Bootstrap CI at 99: ( 0.675884321057 , 0.869868987779 )
Fuzzy Kappa: Observed agreement = 0.785714285714 Expected agreement = 0.160807823129 Kappa = 0.744652392871 Bootstrap CI at 90: ( 0.694713578252 , 0.792806170592 ) Bootstrap CI at 95: ( 0.685498819578 , 0.802020929267 ) Bootstrap CI at 99: ( 0.667069302229 , 0.820450446615 )
coding_2 v coding_1
Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.785714285714 Expected agreement = 0.158979591837 Kappa = 0.745207473914 Bootstrap CI at 90: ( 0.678192066067 , 0.812635257604 ) Bootstrap CI at 95: ( 0.665562554135 , 0.825264769536 ) Bootstrap CI at 99: ( 0.64030353027 , 0.850523793401 )
Fuzzy Kappa: Observed agreement = 0.797619047619 Expected agreement = 0.185323129252 Kappa = 0.751581320585 ##Highest level of agreement Bootstrap CI at 90: ( 0.698250590846 , 0.798034144329 ) Bootstrap CI at 95: ( 0.688876984307 , 0.807407750868 ) Bootstrap CI at 99: ( 0.670129771229 , 0.826154963947 )
coding_2 v coding_3
Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.707142857143 Expected agreement = 0.146581632653 Kappa = 0.656842231123 #lowest crisp Kappa Bootstrap CI at 90: ( 0.583668451584 , 0.726886844827 ) Bootstrap CI at 95: ( 0.570214602522 , 0.740340693889 ) Bootstrap CI at 99: ( 0.543306904398 , 0.767248392014 )
Fuzzy Kappa: Observed agreement = 0.704761904762 Expected agreement = 0.161556122449 Kappa = 0.647873753816 ##Lowest level of agreement Bootstrap CI at 90: ( 0.589658061554 , 0.704238827568 ) Bootstrap CI at 95: ( 0.578894413837 , 0.715002475284 ) Bootstrap CI at 99: ( 0.557367118404 , 0.736529770717 )
coding_2 v coding_4
Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.742857142857 Expected agreement = 0.160102040816 Kappa = 0.693840359616 Bootstrap CI at 90: ( 0.62359611456 , 0.762761519359 ) Bootstrap CI at 95: ( 0.610523000776 , 0.775834633143 ) Bootstrap CI at 99: ( 0.584376773207 , 0.801980860712 )
Fuzzy Kappa: Observed agreement = 0.741666666667 Expected agreement = 0.178273809524 Kappa = 0.685621151757 Bootstrap CI at 90: ( 0.626711496543 , 0.740913339549 ) Bootstrap CI at 95: ( 0.615983444625 , 0.751641391468 ) Bootstrap CI at 99: ( 0.594527340787 , 0.773097495305 )
##Open-ended results
#Open Question A: Please say a few words about what would make you prefer automatic translators over professional human translators, if anything.
codesA <- gather(MTy[,c(1,90:92)], CodeNumber, Code, -ID) #warning as not all codes (levels) are used in all three columns
codesA_n <- sum(!is.na(codesA[3]))
missing <- sum(is.na(MTy$Code1))
codesA_no_nas <- codesA[ which(!is.na(codesA$Code)), ]
codes_QA <- count(codesA_no_nas, Code)
codes_QA$pc <- round((codes_QA$n/sum(codes_QA$n))*100,1)
codes_QA <- codes_QA[order(-codes_QA$n),]
kable(codes_QA, row.names=FALSE, col.names=c("Please say a few words about what would make you prefer automatic translators over professional human translators, if anything", "n", "%"), caption ="Open Question A. Base: total coding instances (1520). Total responses: 901")
Please say a few words about what would make you prefer automatic translators over professional human translators, if anything | n | % |
---|---|---|
Usability | 442 | 29.1 |
Speed | 276 | 18.2 |
Cost | 262 | 17.2 |
Quality | 155 | 10.2 |
Use contexts | 117 | 7.7 |
Human v Machine | 105 | 6.9 |
Affect | 36 | 2.4 |
Undefined | 36 | 2.4 |
Language | 30 | 2.0 |
Platform | 18 | 1.2 |
Somatics (HCI) | 16 | 1.1 |
Message type/form | 15 | 1.0 |
Procedure | 12 | 0.8 |
#Open Question B: How would you describe the ideal automatic translator of the future?
codesB <- gather(MTy[,c(1,95:97)], CodeNumber, Code, -ID) #warning as not all codes (levels) are used in all three columns
codesB_n <- sum(!is.na(codesB[3]))
missing <- sum(is.na(MTy$Code1.1))
codesB_no_nas <- codesB[ which(!is.na(codesB$Code)), ]
codes_QB <- count(codesB_no_nas, Code)
codes_QB$pc <- round((codes_QB$n/sum(codes_QB$n))*100,1)
codes_QB <- codes_QB[order(-codes_QB$n),]
kable(codes_QB, row.names=FALSE, col.names=c("How would you describe the ideal automatic translator of the future?", "n", "%"), caption ="Open Question B. Base: total coding instances (1309). Total reponses: 905")
How would you describe the ideal automatic translator of the future? | n | % |
---|---|---|
Quality | 412 | 31.5 |
Procedure | 154 | 11.8 |
Language | 136 | 10.4 |
Speed | 125 | 9.5 |
Message type/form | 116 | 8.9 |
Undefined | 90 | 6.9 |
Usability | 87 | 6.6 |
Platform | 46 | 3.5 |
Human v Machine | 39 | 3.0 |
Somatics (HCI) | 39 | 3.0 |
Cost | 29 | 2.2 |
Use contexts | 25 | 1.9 |
Affect | 11 | 0.8 |
#Opended-ended questions graph
graphA <- ggplot(codes_QA, aes(x=reorder(Code, pc), y=pc)) + geom_bar(stat="identity", fill="#E69F00") + ylab("% coding instances") + xlab("") + ggtitle("A: Please say a few words about what \nwould make you prefer automatic \ntranslators over professional \nhuman translators, if anything.") + theme(plot.title = element_text(size = 9.5), axis.title = element_text(size=9.5))+ coord_flip()
graphB <- ggplot(codes_QB, aes(x=reorder(Code, pc), y=pc)) + geom_bar(stat="identity", fill="#56B4E9") + ylab("% coding instances") + xlab("") + ggtitle("B: How would you describe the ideal \nautomatic translator of the future?") + theme(plot.title = element_text(size = 9.5), axis.title = element_text(size=9.5)) + coord_flip()
plot_grid(graphA, graphB, align="h", nrow=1)