Analysis of multiple response

Question 1

Analysis of multiple response

r spss survey

BuckyOH · Feb 13, 2012 · Viewed 7.6k times · Source

Answer

Answer

Your function is actually far too complicated for what you need to do. I think a function like this should work and be more flexible.

multfreqtable = function(data, question.prefix) {
  # Find the columns with the questions
  a = grep(question.prefix, names(data))
  # Find the total number of responses
  b = sum(data[, a] != 0)
  # Find the totals for each question
  d = colSums(data[, a] != 0)
  # Find the number of respondents
  e = sum(rowSums(data[,a]) !=0)
  # d + b as a vector. This is your overfall frequency 
  f = as.numeric(c(d, b))
  data.frame(question = c(names(d), "Total"),
             freq = f,
             percent = (f/b)*100,
             percentofcases = (f/e)*100 )
}

Add another question to your example dataset:

set.seed(1); df1$q2a = sample(c(0, 1), 30, replace=T)
set.seed(2); df1$q2b = sample(c(0, 2), 30, replace=T)
set.seed(3); df1$q2c = sample(c(0, 3), 30, replace=T)

Make a table for "q1" responses:

> multfreqtable(df1, "q1")
  question freq   percent percentofcases
1      q1a   15  33.33333             60
2      q1b   15  33.33333             60
3      q1c   15  33.33333             60
4    Total   45 100.00000            180

Make a table for "q2" responses:

> multfreqtable(df1, "q2")
  question freq   percent percentofcases
1      q2a   14  31.11111       53.84615
2      q2b   13  28.88889       50.00000
3      q2c   18  40.00000       69.23077
4    Total   45 100.00000      173.07692

Tables for multiple questions

Here's a modified version of the function that allows you to create a list of tables for multiple questions at once:

multfreqtable = function(data, question.prefix) {
  z = length(question.prefix)
  temp = vector("list", z)

  for (i in 1:z) {
    a = grep(question.prefix[i], names(data))
    b = sum(data[, a] != 0)
    d = colSums(data[, a] != 0)
    e = sum(rowSums(data[,a]) !=0)
    f = as.numeric(c(d, b))
    temp[[i]] = data.frame(question = c(sub(question.prefix[i], 
                                            "", names(d)), "Total"),
                           freq = f,
                           percent = (f/b)*100,
                           percentofcases = (f/e)*100 )
    names(temp)[i] = question.prefix[i]
  }
  temp
}

Examples:

> multfreqtable(df1, "q1")
$q1
  question freq   percent percentofcases
1        a   15  33.33333             60
2        b   15  33.33333             60
3        c   15  33.33333             60
4    Total   45 100.00000            180

> test1 = multfreqtable(df1, c("q1", "q2"))
> test1
$q1
  question freq   percent percentofcases
1        a   15  33.33333             60
2        b   15  33.33333             60
3        c   15  33.33333             60
4    Total   45 100.00000            180

$q2
  question freq   percent percentofcases
1        a   14  31.11111       53.84615
2        b   13  28.88889       50.00000
3        c   18  40.00000       69.23077
4    Total   45 100.00000      173.07692

> test1$q1
  question freq   percent percentofcases
1        a   15  33.33333             60
2        b   15  33.33333             60
3        c   15  33.33333             60
4    Total   45 100.00000            180

Question 2

df1 <-
  data.frame(c("male", "female", "male"),
             c("1", "2", "3", "4", "5", "6"),
             seq(141, 170))

names(df1) = c("gender", "age", "height")

df1$age <- factor(
  df1$age,
  levels = c(1, 2, 3, 4, 5, 6),
  labels = c("16-24", "25-34", "35-44", "45-54", "55-64", "65+")
)

q1a = c(1, 0, 1, 0, 0, 1)
q1b = c(0, 0, 2, 2, 2, 0)
q1c = c(0, 0, 3, 3, 0, 3)
# 1,2 and 3 used to be compatible with existing datasets. 
# Could change all to 1 if necessary.

df2 <- data.frame(q1a = q1a, q1b = q1b, q1c = q1c)
df1 <- cbind(df1, df2)

rm(q1a, q1b, q1c, df2)

I am looking to replicate the analysis of multiple response questions from SPSS in R.

At the moment I am using this code:

#creating function for analysing questions with grouped data
multfreqtable <- function(a, b, c) {
  # number of respondents (for percent of cases)
  totrep = sum(a == 1 | b == 2 | c == 3)
  
  #creating frequency table
  table_a = data.frame("a", sum(a == 1))
  names(table_a) = c("question", "freq")
  table_b = data.frame("b", sum(b == 2))
  names(table_b) = c("question", "freq")
  table_c = data.frame("c", sum(c == 3))
  names(table_c) = c("question", "freq")
  table_question <- rbind(table_a, table_b, table_c)
  
  #remove individual question tables
  rm(table_a, table_b, table_c)
  
  #adding total
  total = as.data.frame("Total")
  totalsum = (sum(table_question$freq, na.rm = TRUE))
  totalrow = cbind(total, totalsum)
  names(totalrow) = c("question", "freq")
  table_question = rbind(table_question, totalrow)
  
  #adding percentage column to frequency table
  percentcalc = as.numeric(table_question$freq)
  percent = (percentcalc / totalsum) * 100
  table_question <- cbind(table_question, percent)
  
  #adding percent of cases column to frequency table
  poccalc = as.numeric(table_question$freq)
  percentofcases = (poccalc / totrep) * 100
  table_question <- cbind(table_question, percentofcases)
  
  #print percent of cases value
  total_respondents <<- data.frame(totrep)
  
  #remove all unnecessary data and values
  rm(
    total,
    totalsum,
    totalrow,
    b,
    c,
    percent,
    percentcalc,
    percentofcases,
    totrep,
    poccalc
  )
  
  return(table_question)
}

#calling function - must tie to data.frame using $ !!!
q1_frequency <- multfreqtable(df1$q1a, df1$q1b, df1$q1c)

#renaming percent of cases - This is very important while using current method
total_respondents_q1 <- total_respondents
rm(total_respondents)

Producing this table as a result:

Output table

I am looking for a more efficient method of doing this that ideally would not require the function to be edited if there were more or less multiple choice questions.

Analysis of multiple response

Answer

Tables for multiple questions

Related questions