Importing packages and survey data

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(cowplot)
library(forcats)
library(knitr)

survey <- read.csv("mt_uk_survey_data.csv", na.strings=c("")) #replaces blank cells with NA

Demographics

#education
colnames(survey)[3] <- "education"
survey$education <- fct_explicit_na(survey$education) #Makes 28 missing responses explicit
edu <- count(survey, education) 
edu$pc <- round((edu$n/sum(edu$n))*100,1)
ed <- ggplot(edu, aes(x=reorder(education, n), y=n)) + 
  geom_col(width=0.8, fill="#E69F00") + ylim(0,515)+ 
  geom_text(aes(label=paste0(pc, "%")), hjust=0, size=3.2) + ylab("Count") +xlab("") + coord_flip() +ggtitle("Education") + theme(plot.title = element_text(size = 9))

#employment
survey$employment_status <- gsub("0", "(Missing)", survey$employment_status) #replaces 12 missing data cells (0) with "(Missing)"
survey$employment_status <- gsub("DATA EXPIRED", "(Missing)", survey$employment_status) #replaces 3 cells with expired data with "(Missing)"
emp <- count(survey, employment_status) 
emp$pc <- round((emp$n/sum(emp$n))*100,1)
em <- ggplot(emp, aes(x=reorder(employment_status, n), y=n)) + 
  geom_col(width=0.8, fill="#56B4E9") +  scale_y_continuous(limit = c(0,570), breaks = c(0, 100, 200, 300, 400, 500)) +
  geom_text(aes(label=paste0(pc, "%")), hjust=0, size=3.2) + ylab("Count") +xlab("") + coord_flip() +ggtitle("Employment") + theme(plot.title = element_text(size = 9))

demo_plot <- plot_grid(ed, em, align = "v", ncol = 1)
demo_plot

#students
survey$student <- gsub("0", "(Missing)", survey$student) #replaces 6 missing data cells (0) with "(Missing)"
students <- count(survey, student)
students$pc <- round((students$n/sum(students$n))*100,1)
students <- arrange(students, -n)
kable(students, row.names = FALSE, col.names=c("Student","n", "%")) #as per Prolific details
Student n %
No 1052 87.7
Yes 142 11.8
(Missing) 6 0.5
#native language
colnames(survey)[4] <- "native_lang"
survey$native_lang <- fct_explicit_na(survey$native_lang) #Makes 7 cells with missing data explicit
survey$native_lang <- gsub("HE Administration", "(Missing)", survey$native_lang) #treats one invalid answer (HE Administration) as a case of "(Missing)"

native.language <- survey[ which(!is.na(survey$native_lang)), ] #excludes 7 blank responses 
native.language <- subset(native.language, native.language$native_lang != "HE Administration") #excludes 1 mistaken answer that says "HE Administration"

en.native <- length(grep("English|British|Engish|Engligh|englsh|Enlish|Ennglish|\\<En\\>",native.language$native_lang, ignore.case=TRUE)) #includes respondents who said 'British' was their native language 
en.native.pc <- round((en.native/nrow(native.language))*100,1) 

non.en.native <- length(grep("English|British|Engish|Engligh|englsh|Enlish|Ennglish|\\<En\\>|(Missing)",native.language$native_lang, ignore.case=TRUE, invert=TRUE))
non.en.native.pc <- round((non.en.native/nrow(native.language))*100,1) 

missing <- length(grep("(Missing)", native.language$native_lang))
missing.pc <- round((missing/nrow(native.language))*100,1) 

Native_speaker_of_English <- c("Yes", "No", "(Missing)")
n <- c(en.native, non.en.native, missing)
pc <- c(en.native.pc, non.en.native.pc, missing.pc)
lang.native <- data.frame(Native_speaker_of_English, n, pc)
kable(lang.native, row.names = FALSE, col.names=c("Native speaker of English?","n", "%")) 
Native speaker of English? n %
Yes 1132 94.3
No 60 5.0
(Missing) 8 0.7
#other language (at least basic knowledge)
colnames(survey)[5] <- "language_yn"
survey$language_yn <- fct_explicit_na(survey$language_yn) #makes 8 missing responses explicit
language <- count(survey, language_yn)
language$pc <- round((language$n/sum(language$n))*100,1)
language <- arrange(language, -n)
kable(language, row.names = FALSE, col.names=c("Are there other languages you know at least a little (for example, enough to read a restaurant menu)?","n", "%"))
Are there other languages you know at least a little (for example, enough to read a restaurant menu)? n %
Yes 724 60.3
No 468 39.0
(Missing) 8 0.7
#French
colnames(survey)[6] <- "other_languages"
lang.nonnative.fr <- length(grep("French|Frech|Franch|\\<Fr\\>",survey$other_languages, ignore.case=TRUE))
lang.nonnative.fr.pc <- round((lang.nonnative.fr/nrow(survey))*100,1)

n <- c(lang.nonnative.fr)
pc <- c(lang.nonnative.fr.pc)
lang.French <- data.frame(n, pc)
kable(lang.French, row.names = FALSE, col.names=c("Respondents with some French", "%")) 
Respondents with some French %
457 38.1

#Closed-ended results ## Used MT before

colnames(survey)[8] <- "MTuse_yn"

MTuse <- within(survey, MTuse_yn[Other.3 == "A tried to answer NO but it kept freezing"] <- 0) #changing MTuse_yn value to 'no' (0) for respondent who selected 'yes' because of technical error

MTyn <- count(MTuse, MTuse_yn)
MTyn$pc <- round((MTyn$n/sum(MTyn$n))*100,1)

MTyn <- MTyn %>%
      mutate(MTuse_yn = ifelse(MTuse_yn == 0,"No","Yes"))

kable(MTyn, row.names=FALSE, col.names = c("Have you used automatic translators before?", "n", "%"))
Have you used automatic translators before? n %
No 289 24.1
Yes 911 75.9
MTy <- MTuse[ which(MTuse$MTuse_yn != 0), ] #excludes 289 entries by those who had not used MT before
MTy_n <- nrow(MTy) #911

#MT systems and/or interfaces

#The different options reflect how users normally came into contact with the tools. The underlying systems will in some cases be the same.

gt <- sum(!is.na(MTy$Google.Translate))
ms <- sum(!is.na(MTy$Bing.or.Microsoft.Translator))
deepl <- sum(!is.na(MTy$DeepL))
itrans <- sum(!is.na(MTy$iTranslate))
fb <- sum(!is.na(MTy$The.default.automatic.translator.in.Facebook))
twitter <- sum(!is.na(MTy$The.default.automatic.translator.in.Twitter))
word <- sum(!is.na(MTy$The.default.automatic.translator.in.Microsoft.Word))
chrome <- sum(!is.na(MTy$The.default.automatic.translator.in.Google.Chrome))
ie <- sum(!is.na(MTy$The.default.automatic.translator.in.Internet.Explorer))
transall <- sum(!is.na(MTy$TranslateAll))
skype <- sum(!is.na(MTy$Skype.Translator))
reverso <- sum(!is.na(MTy$Reverso))
systran <- sum(!is.na(MTy$Systran))
promt <- sum(!is.na(MTy$PROMT))
freetrans <- sum(!is.na(MTy$FreeTranslation.com))
yandex <- sum(!is.na(MTy$Yandex))
babelfish <- sum(!is.na(MTy$BabelFish))
yahoo <- sum(!is.na(MTy$Yahoo..Babel.Fish.or.Altavista.Translation.Service))
assistant <- sum(!is.na(MTy$Google.Assistant))
other <- sum(!is.na(MTy$Other.3))

systems.interfaces <- c("Google Translate", "Microsoft Translator", "DeepL", "iTranslate", "Facebook", "Twitter", "Microsoft Word", "Google Chrome", "Internet Explorer", "TranslateAll", "Skype", "Reverso", "Systran", "PROMT", "Freetranslation.com", "Yandex", "Babelfish", "Yahoo/Babel Fish", "Google Assistant", "Other")
systems.counts <- c(gt, ms, deepl, itrans, fb, twitter, word, chrome, ie, transall, skype, reverso, systran, promt, freetrans, yandex, babelfish, yahoo, assistant, other)
systems.pc <- c(round((gt/MTy_n)*100,1), round((ms/MTy_n)*100,1), round((deepl/MTy_n)*100,1), round((itrans/MTy_n)*100,1), round((fb/MTy_n)*100,1), round((twitter/MTy_n)*100,1), round((word/MTy_n)*100,1), round((chrome/MTy_n)*100,1), round((ie/MTy_n)*100,1), round((transall/MTy_n)*100,1), round((skype/MTy_n)*100,1), round((reverso/MTy_n)*100,1), round((systran/MTy_n)*100,1), round((promt/MTy_n)*100,1), round((freetrans/MTy_n)*100,1),round((yandex/MTy_n)*100,1), round((babelfish/MTy_n)*100,1), round((yahoo/MTy_n)*100,1), round((assistant/MTy_n)*100,1), round((other/MTy_n)*100,1))

systems_df <- data.frame(systems.interfaces, systems.counts, systems.pc)
systems_df <- systems_df[order(-systems.counts),]
kable(systems_df, row.names=FALSE,col.names=c("Systems/interfaces", "n", "%"))
Systems/interfaces n %
Google Translate 867 95.2
Google Chrome 227 24.9
Facebook 207 22.7
Babelfish 122 13.4
Twitter 90 9.9
Google Assistant 62 6.8
Microsoft Translator 46 5.0
Internet Explorer 29 3.2
Microsoft Word 21 2.3
iTranslate 18 2.0
Yahoo/Babel Fish 18 2.0
Other 11 1.2
Freetranslation.com 8 0.9
Reverso 7 0.8
DeepL 5 0.5
Skype 5 0.5
Yandex 5 0.5
Systran 3 0.3
PROMT 2 0.2
TranslateAll 0 0.0

##Use context

leisure <- nrow(MTy[ which(MTy$Leisure.1 =="Leisure"), ])
study <- nrow(MTy[ which(MTy$Study.1 =="Study"), ])
work <- nrow(MTy[ which(MTy$Work.1 =="Work"), ])
other <- sum(!is.na(MTy$Other.4))
missing.context <- sum(is.na(MTy$Leisure.1) & is.na(MTy$Study.1) & is.na(MTy$Work.1) & is.na(MTy$Other.4))

contexts <- c("Leisure", "Study", "Work", "Other", "No response")
contexts_counts <- c(leisure, study, work, other, missing.context)
contexts_pc <- c(round((leisure/MTy_n)*100,1), round((study/MTy_n)*100,1), round((work/MTy_n)*100,1), round((other/MTy_n)*100,1), round((missing.context/MTy_n)*100,1))

contexts_df <- data.frame(contexts, contexts_counts, contexts_pc)
contexts_df <- contexts_df[order(-contexts_counts),]
kable(contexts_df, row.names=FALSE, col.names=c("Please select why you used automatic translators", "n", "%"))  #multiple choice allowed, hence pc > 100%
Please select why you used automatic translators n %
Leisure 730 80.1
Work 253 27.8
Study 208 22.8
Other 54 5.9
No response 13 1.4

##Location of use

abroad <- nrow(MTy[ which(MTy$Abroad.1 =="Abroad"), ])
uk <- nrow(MTy[ which(MTy$In.the.UK.1 =="In the UK"), ])
missing.loc <- sum(is.na(MTy$Abroad.1) & is.na(MTy$In.the.UK.1))

location <- c("Abroad", "UK", "No response")
location_counts <- c(abroad, uk, missing.loc)
location_pc <- c(round((abroad/MTy_n)*100,1), round((uk/MTy_n)*100,1), round((missing.loc/MTy_n)*100,1))

location_df <- data.frame(location, location_counts, location_pc)
location_df <- location_df[order(-location_counts),]
kable(location_df, row.names=FALSE, col.names=c("MT use location", "n", "%"))  #multiple choice allowed, hence pc > 100%
MT use location n %
UK 832 91.3
Abroad 295 32.4
No response 5 0.5
desk.laptop <- sum(!is.na(MTy[71]))
mobile <- sum(!is.na(MTy[72]))
smartwatch <- sum(!is.na(MTy[73]))
speaker <- sum(!is.na(MTy[74]))
tablet <- sum(!is.na(MTy[75]))
other.device <- sum(!is.na(MTy[76]))
missing.device <- sum(is.na(MTy[71]) & is.na(MTy[72]) & is.na(MTy[73]) & is.na(MTy[74]) & is.na(MTy[75]) & is.na(MTy[76]))

devices_names <- c("Desktop or laptop computer", "Mobile phone",                                                                                                                               
"Smartwatch" ,                                                              
"Smart speaker or smart home device",                                                                                                                                
"Tablet",                                                                                                                                     
"Other",
"No response")  

devices_counts <- c(desk.laptop, mobile, smartwatch, speaker, tablet, other.device, missing.device)

devices_pc <- c(round((desk.laptop/MTy_n)*100,1), round((mobile/MTy_n)*100,1), round((smartwatch/MTy_n)*100,1), round((speaker/MTy_n)*100,1), round((tablet/MTy_n)*100,1), round((other.device/MTy_n)*100,1), round((missing.device/MTy_n)*100,1))

devices_df <- data.frame(devices_names, devices_counts, devices_pc)
devices_df <- devices_df[order(-devices_counts),]
kable(devices_df, row.names = FALSE, col.names=c("On what type of device have you used automatic translators?","n","%")) #multiple choice allowed, hence pc > 100%
On what type of device have you used automatic translators? n %
Desktop or laptop computer 721 79.1
Mobile phone 555 60.9
Tablet 182 20.0
Smart speaker or smart home device 14 1.5
Smartwatch 5 0.5
No response 4 0.4
Other 1 0.1

##Use purpose and procedure

read <- sum(!is.na(MTy[58]))
send <- sum(!is.na(MTy[59]))
emergency <- sum(!is.na(MTy[60]))
play <- sum(!is.na(MTy[61]))
transaction <- sum(!is.na(MTy[62]))
browsing <- sum(!is.na(MTy[63]))
social_media <- sum(!is.na(MTy[64]))
online_speech <- sum(!is.na(MTy[65]))
online_texting <- sum(!is.na(MTy[66]))
f2f_typing <- sum(!is.na(MTy[67]))
language_learning <- sum(!is.na(MTy[68]))
f2f_speech <- sum(!is.na(MTy[69]))
other <- sum(!is.na(MTy[70]))


uses_names <- c("I needed to read a text or document in a different language", "I needed to send an e-mail or a message",                                                                                                                               
"I needed urgent help or was in a situation I consider serious  e.g.  in hospital or at a police station" ,                                                              
"I used it for play or out of curiosity",                                                                                                                                
"I wanted to buy or sell something",                                                                                                                                     
"I was browsing the internet",                                                                                                                                           
"I was on social media or on an online forum",                                                                                                                           
"I was speaking out loud to someone online in real time",                                                                                                                
"I was texting online in real time",                                                                                                                                     
"I was trying to communicate with someone in person by typing messages in the same physical space",                                                                      
"I was trying to learn a language or the meaning of something in a different language",                                                                                  
"I was trying to speak out loud with someone in person in the same physical space" ,                                                                                     
"Other")  

uses_counts <- c(read, send, emergency, play, transaction, browsing, social_media, online_speech, online_texting, f2f_typing, language_learning, f2f_speech, other)

uses_pc <- c(round((read/MTy_n)*100,1), round((send/MTy_n)*100,1), round((emergency/MTy_n)*100,1), round((play/MTy_n)*100,1), round((transaction/MTy_n)*100,1), round((browsing/MTy_n)*100,1), round((social_media/MTy_n)*100,1), round((online_speech/MTy_n)*100,1), round((online_texting/MTy_n)*100,1), round((f2f_typing/MTy_n)*100,1), round((language_learning/MTy_n)*100,1), round((f2f_speech/MTy_n)*100,1), round((other/MTy_n)*100,1))

uses_df <- data.frame(uses_names, uses_counts, uses_pc)
uses_df <- uses_df[order(-uses_counts),]
kable(uses_df, row.names = FALSE, col.names=c("How would you describe the situation(s) where you used automatic translators?","n","%")) #multiple choice allowed, hence pc > 100%
How would you describe the situation(s) where you used automatic translators? n %
I needed to read a text or document in a different language 620 68.1
I used it for play or out of curiosity 369 40.5
I was browsing the internet 341 37.4
I was trying to learn a language or the meaning of something in a different language 259 28.4
I was on social media or on an online forum 256 28.1
I needed to send an e-mail or a message 212 23.3
I was trying to communicate with someone in person by typing messages in the same physical space 111 12.2
I wanted to buy or sell something 109 12.0
I was trying to speak out loud with someone in person in the same physical space 73 8.0
I was texting online in real time 66 7.2
I was speaking out loud to someone online in real time 38 4.2
Other 32 3.5
I needed urgent help or was in a situation I consider serious e.g. in hospital or at a police station 19 2.1

##Perceptions of Accuracy

colnames(MTy)[77] <- "accuracy"
MTy$accuracy <- fct_explicit_na(MTy$accuracy)
accuracy <- count(MTy, accuracy)
accuracy$pc <- round((accuracy$n/sum(accuracy$n))*100,1)
accuracy <- accuracy[order(-accuracy$n),]
kable(accuracy, row.names = FALSE, col.names=c("Based on your overall experience, would you say automatic translators are often:", "n", "%"))
Based on your overall experience, would you say automatic translators are often: n %
Accurate 617 67.7
Inaccurate 126 13.8
Very accurate 114 12.5
I don’t know 37 4.1
Very inaccurate 12 1.3
(Missing) 5 0.5

##Perceptions of Risk

colnames(MTy)[78] <- "risk"
risk <- count(MTy, risk)
risk$pc <- round((risk$n/sum(risk$n))*100,1)
risk <- risk[order(-risk$n),]

risk <- risk %>%
      mutate(risk = ifelse(risk == 0,"No","Yes"))

kable(risk, row.names = FALSE, col.names=c("Based on your overall experience, did you ever feel using automatic translators was a risk?", "n", "%"))
Based on your overall experience, did you ever feel using automatic translators was a risk? n %
No 700 76.8
Yes 211 23.2

##Perceived severity of risk

colnames(MTy)[79] <- "risk_rating"
risk_rating_df <- count(MTy, risk_rating)
risk_rating_df <- na.exclude(risk_rating_df)
risk_rating_df$pc <- round((risk_rating_df$n/sum(risk_rating_df$n))*100,1)
risk_rating_df  <- risk_rating_df [order(-risk_rating_df$n),]
kable(risk_rating_df, row.names = FALSE, col.names=c("How would you rate this risk", "n", "%"), caption="1 = very low risk; 5 = very high risk") 
1 = very low risk; 5 = very high risk
How would you rate this risk n %
3 109 51.7
2 54 25.6
4 34 16.1
1 10 4.7
5 4 1.9

##Perceived type of risk

personal_rep <- sum(!is.na(MTy[80]))
studies <- sum(!is.na(MTy[81]))
financial <- sum(!is.na(MTy[82]))
legal <- sum(!is.na(MTy[83]))
medical <- sum(!is.na(MTy[84]))
professional <- sum(!is.na(MTy[85]))
other <- sum(!is.na(MTy[86]))

risk_types <- c("A risk to my personal reputation",                                                                                                                                      
"A risk to my studies",                                                                                                                                                  
"Financial",                                                                                                                                                             
"Legal",                                                                                                                                                                 
"Medical",                                                                                                                                                               
"Professional",                                                                                                                                                          
"Other")

risk_counts <- c(personal_rep, studies, financial, legal, medical, professional, other)

risk_n <- sum(risk_rating_df$n)

risk_pc <- c(round((personal_rep/risk_n)*100,1), round((studies/risk_n)*100,1), round((financial/risk_n)*100,1), round((legal/risk_n)*100,1), round((medical/risk_n)*100,1), round((professional/risk_n)*100,1), round((other/risk_n)*100,1))

risktypes_df <- data.frame(risk_types, risk_counts, risk_pc)
risktypes_df <- risktypes_df[order(-risk_counts),]
kable(risktypes_df, row.names = FALSE, col.names=c("How would you describe the type of risk?","n","%"))
How would you describe the type of risk? n %
A risk to my personal reputation 86 40.8
A risk to my studies 69 32.7
Other 54 25.6
Professional 40 19.0
Financial 18 8.5
Medical 11 5.2
Legal 10 4.7

##Use decision

colnames(MTy)[87] <- "decision"
MTy$decision <- fct_explicit_na(MTy$decision)
decision <- count(MTy, decision)
decision$pc <- round((decision$n/sum(decision$n))*100,1)
decision <- decision[order(-decision$n),]
kable(decision, row.names = FALSE, col.names=c("Would you say you used automatic translators:", "n", "%"))
Would you say you used automatic translators: n %
Because it served my purpose well and I wanted to use it 698 76.6
For lack of a better alternative 208 22.8
(Missing) 5 0.5

##Satisfaction

colnames(MTy)[88] <- "satisfaction"
MTy$satisfaction <- fct_explicit_na(MTy$satisfaction)
satisfaction <- count(MTy, satisfaction)
satisfaction$pc <- round((satisfaction$n/sum(satisfaction$n))*100,1)
satisfaction <- satisfaction[order(-satisfaction$n),]
kable(satisfaction, row.names = FALSE, col.names=c("Please rate how satisfied you were with the automatic translator(s) you used:", "n", "%"))
Please rate how satisfied you were with the automatic translator(s) you used: n %
Satisfied 572 62.8
Very satisfied 274 30.1
Dissatisfied 45 4.9
I don’t know 10 1.1
Very dissatisfied 8 0.9
(Missing) 2 0.2

Open-ended coding agreement (fuzzyKappa.exe run on Windows)

We set aside a random sample of 100 survey entries for all four coders to code independently. The entries included responses to two questions, so 200 coding units in total. We exclude from the agreement calculation 56 empty responses and 4 responses that one coder coded by accident prior to the independent coding. A resulting sample of 140 coding units across the two questions is used for the agreement check.

There were four .txt files with the independent coding (coders 1, 2, 3, and 4)

We compute all between-coder calculations of fuzzyKappa with the fuzzyKappa.bat file with the following arguments:

fuzzyKappa.exe file1.txt file2.txt -f 1 1 1 -norm MIN -ci 1000

###Output

coding_4 v coding_3

Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.757142857143 Expected agreement = 0.146683673469 Kappa = 0.715396113602 Bootstrap CI at 90: ( 0.645930242772 , 0.780966410031 ) Bootstrap CI at 95: ( 0.63324502706 , 0.793651625743 ) Bootstrap CI at 99: ( 0.607874595636 , 0.819022057167 )

Fuzzy Kappa: Observed agreement = 0.745238095238 Expected agreement = 0.156913265306 Kappa = 0.697822425288 Bootstrap CI at 90: ( 0.643114094777 , 0.751149657628 ) Bootstrap CI at 95: ( 0.632965299479 , 0.761298452926 ) Bootstrap CI at 99: ( 0.612667708883 , 0.781596043522 )

coding_4 v coding_1

Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.764285714286 Expected agreement = 0.154693877551 Kappa = 0.721149203283 Bootstrap CI at 90: ( 0.651697791293 , 0.786854309844 ) Bootstrap CI at 95: ( 0.639001269854 , 0.799550831283 ) Bootstrap CI at 99: ( 0.613608226975 , 0.824943874162 )

Fuzzy Kappa: Observed agreement = 0.742857142857 Expected agreement = 0.172465986395 Kappa = 0.689266117265 Bootstrap CI at 90: ( 0.630680069063 , 0.746392491872 ) Bootstrap CI at 95: ( 0.619810114193 , 0.757262446741 ) Bootstrap CI at 99: ( 0.598070204453 , 0.779002356481 )

coding_3 v coding_1

Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.807142857143 Expected agreement = 0.144132653061 Kappa = 0.774664679583 #highest crisp Kappa Bootstrap CI at 90: ( 0.71084667378 , 0.834906635056 ) Bootstrap CI at 95: ( 0.699192556206 , 0.84656075263 ) Bootstrap CI at 99: ( 0.675884321057 , 0.869868987779 )

Fuzzy Kappa: Observed agreement = 0.785714285714 Expected agreement = 0.160807823129 Kappa = 0.744652392871 Bootstrap CI at 90: ( 0.694713578252 , 0.792806170592 ) Bootstrap CI at 95: ( 0.685498819578 , 0.802020929267 ) Bootstrap CI at 99: ( 0.667069302229 , 0.820450446615 )

coding_2 v coding_1

Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.785714285714 Expected agreement = 0.158979591837 Kappa = 0.745207473914 Bootstrap CI at 90: ( 0.678192066067 , 0.812635257604 ) Bootstrap CI at 95: ( 0.665562554135 , 0.825264769536 ) Bootstrap CI at 99: ( 0.64030353027 , 0.850523793401 )

Fuzzy Kappa: Observed agreement = 0.797619047619 Expected agreement = 0.185323129252 Kappa = 0.751581320585 ##Highest level of agreement Bootstrap CI at 90: ( 0.698250590846 , 0.798034144329 ) Bootstrap CI at 95: ( 0.688876984307 , 0.807407750868 ) Bootstrap CI at 99: ( 0.670129771229 , 0.826154963947 )

coding_2 v coding_3

Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.707142857143 Expected agreement = 0.146581632653 Kappa = 0.656842231123 #lowest crisp Kappa Bootstrap CI at 90: ( 0.583668451584 , 0.726886844827 ) Bootstrap CI at 95: ( 0.570214602522 , 0.740340693889 ) Bootstrap CI at 99: ( 0.543306904398 , 0.767248392014 )

Fuzzy Kappa: Observed agreement = 0.704761904762 Expected agreement = 0.161556122449 Kappa = 0.647873753816 ##Lowest level of agreement Bootstrap CI at 90: ( 0.589658061554 , 0.704238827568 ) Bootstrap CI at 95: ( 0.578894413837 , 0.715002475284 ) Bootstrap CI at 99: ( 0.557367118404 , 0.736529770717 )

coding_2 v coding_4

Crisp Kappa (based on the first choice of the raters): Observed agreement = 0.742857142857 Expected agreement = 0.160102040816 Kappa = 0.693840359616 Bootstrap CI at 90: ( 0.62359611456 , 0.762761519359 ) Bootstrap CI at 95: ( 0.610523000776 , 0.775834633143 ) Bootstrap CI at 99: ( 0.584376773207 , 0.801980860712 )

Fuzzy Kappa: Observed agreement = 0.741666666667 Expected agreement = 0.178273809524 Kappa = 0.685621151757 Bootstrap CI at 90: ( 0.626711496543 , 0.740913339549 ) Bootstrap CI at 95: ( 0.615983444625 , 0.751641391468 ) Bootstrap CI at 99: ( 0.594527340787 , 0.773097495305 )

##Open-ended results

#Open Question A: Please say a few words about what would make you prefer automatic translators over professional human translators, if anything.

codesA <- gather(MTy[,c(1,90:92)], CodeNumber, Code, -ID) #warning as not all codes (levels) are used in all three columns 

codesA_n <- sum(!is.na(codesA[3]))

missing <- sum(is.na(MTy$Code1))

codesA_no_nas <- codesA[ which(!is.na(codesA$Code)), ]

codes_QA <- count(codesA_no_nas, Code)
codes_QA$pc <- round((codes_QA$n/sum(codes_QA$n))*100,1)

codes_QA <- codes_QA[order(-codes_QA$n),]
kable(codes_QA, row.names=FALSE, col.names=c("Please say a few words about what would make you prefer automatic translators over professional human translators, if anything", "n", "%"), caption ="Open Question A. Base: total coding instances (1520). Total responses: 901")
Open Question A. Base: total coding instances (1520). Total responses: 901
Please say a few words about what would make you prefer automatic translators over professional human translators, if anything n %
Usability 442 29.1
Speed 276 18.2
Cost 262 17.2
Quality 155 10.2
Use contexts 117 7.7
Human v Machine 105 6.9
Affect 36 2.4
Undefined 36 2.4
Language 30 2.0
Platform 18 1.2
Somatics (HCI) 16 1.1
Message type/form 15 1.0
Procedure 12 0.8
#Open Question B: How would you describe the ideal automatic translator of the future?

codesB <- gather(MTy[,c(1,95:97)], CodeNumber, Code, -ID) #warning as not all codes (levels) are used in all three columns 

codesB_n <- sum(!is.na(codesB[3]))

missing <- sum(is.na(MTy$Code1.1))

codesB_no_nas <- codesB[ which(!is.na(codesB$Code)), ]

codes_QB <- count(codesB_no_nas, Code)
codes_QB$pc <- round((codes_QB$n/sum(codes_QB$n))*100,1)
codes_QB <- codes_QB[order(-codes_QB$n),]
kable(codes_QB, row.names=FALSE, col.names=c("How would you describe the ideal automatic translator of the future?", "n", "%"), caption ="Open Question B. Base: total coding instances (1309). Total reponses: 905")
Open Question B. Base: total coding instances (1309). Total reponses: 905
How would you describe the ideal automatic translator of the future? n %
Quality 412 31.5
Procedure 154 11.8
Language 136 10.4
Speed 125 9.5
Message type/form 116 8.9
Undefined 90 6.9
Usability 87 6.6
Platform 46 3.5
Human v Machine 39 3.0
Somatics (HCI) 39 3.0
Cost 29 2.2
Use contexts 25 1.9
Affect 11 0.8

#Opended-ended questions graph

graphA <- ggplot(codes_QA, aes(x=reorder(Code, pc), y=pc)) + geom_bar(stat="identity", fill="#E69F00") +  ylab("% coding instances") + xlab("") + ggtitle("A: Please say a few words about what \nwould make you prefer automatic \ntranslators over professional \nhuman translators, if anything.") + theme(plot.title = element_text(size = 9.5), axis.title = element_text(size=9.5))+ coord_flip()

graphB <- ggplot(codes_QB, aes(x=reorder(Code, pc), y=pc)) + geom_bar(stat="identity", fill="#56B4E9") +  ylab("% coding instances") + xlab("") + ggtitle("B: How would you describe the ideal \nautomatic translator of the future?") + theme(plot.title = element_text(size = 9.5), axis.title = element_text(size=9.5)) + coord_flip()

plot_grid(graphA, graphB, align="h", nrow=1)