An Upset plot is an alternative to the
Venn Diagram used to deal with more
than 3 sets. It is built in R
thanks to the
upSetR
package.
To visualize the intersection between more than 3 sets, the best option is to use an UpSet plot.
Here is an example provided by the UpsetR
package that
displays information concerning the Banana genome.
The total size of each set is represented on the left barplot. Every possible intersection is represented by the bottom plot, and their occurence is shown on the top barplot.
Source: the upSetR documentation
# Specific library
library(UpSetR)
# Dataset
input <- c(
M.acuminata = 759,
P.dactylifera = 769,
A.thaliana = 1187,
O.sativa = 1246,
S.bicolor = 827,
B.distachyon = 387,
"P.dactylifera&M.acuminata" = 467,
"O.sativa&M.acuminata" = 29,
"A.thaliana&O.sativa" = 6,
"S.bicolor&A.thaliana" = 9,
"O.sativa&P.dactylifera" = 32,
"S.bicolor&P.dactylifera" = 49,
"S.bicolor&M.acuminata" = 49,
"B.distachyon&O.sativa" = 547,
"S.bicolor&O.sativa" = 1151,
"B.distachyon&A.thaliana" = 10,
"B.distachyon&M.acuminata" = 9,
"B.distachyon&S.bicolor" = 402,
"M.acuminata&A.thaliana" = 155,
"A.thaliana&P.dactylifera" = 105,
"B.distachyon&P.dactylifera" = 25,
"S.bicolor&O.sativa&P.dactylifera" = 42,
"B.distachyon&O.sativa&P.dactylifera" = 12,
"S.bicolor&O.sativa&B.distachyon" = 2809,
"B.distachyon&O.sativa&A.thaliana" = 18,
"S.bicolor&O.sativa&A.thaliana" = 40,
"S.bicolor&B.distachyon&A.thaliana" = 14,
"O.sativa&B.distachyon&M.acuminata" = 28,
"S.bicolor&B.distachyon&M.acuminata" = 13,
"O.sativa&M.acuminata&P.dactylifera" = 35,
"M.acuminata&S.bicolor&A.thaliana" = 21,
"B.distachyon&M.acuminata&A.thaliana" = 7,
"O.sativa&M.acuminata&A.thaliana" = 13,
"M.acuminata&P.dactylifera&A.thaliana" = 206,
"P.dactylifera&A.thaliana&S.bicolor" = 4,
"O.sativa&A.thaliana&P.dactylifera" = 6,
"S.bicolor&O.sativa&M.acuminata" = 64,
"S.bicolor&M.acuminata&P.dactylifera" = 19,
"B.distachyon&A.thaliana&P.dactylifera" = 3,
"B.distachyon&M.acuminata&P.dactylifera" = 12,
"B.distachyon&S.bicolor&P.dactylifera" = 23,
"M.acuminata&B.distachyon&S.bicolor&A.thaliana" = 54,
"P.dactylifera&S.bicolor&O.sativa&M.acuminata" = 62,
"B.distachyon&O.sativa&M.acuminata&P.dactylifera" = 18,
"S.bicolor&B.distachyon&O.sativa&A.thaliana" = 206,
"B.distachyon&M.acuminata&O.sativa&A.thaliana" = 29,
"O.sativa&M.acuminata&A.thaliana&S.bicolor" = 71,
"M.acuminata&O.sativa&P.dactylifera&A.thaliana" = 28,
"B.distachyon&M.acuminata&O.sativa&A.thaliana" = 7,
"B.distachyon&S.bicolor&P.dactylifera&A.thaliana" = 11,
"B.distachyon&O.sativa&P.dactylifera&A.thaliana" = 5,
"A.thaliana&P.dactylifera&S.bicolor&O.sativa" = 21,
"M.acuminata&S.bicolor&P.dactylifera&A.thaliana" = 23,
"M.acuminata&B.distachyon&S.bicolor&P.dactylifera" = 24,
"M.acuminata&O.sativa&S.bicolor&B.distachyon" = 368,
"P.dactylifera&B.distachyon&S.bicolor&O.sativa" = 190,
"P.dactylifera&B.distachyon&S.bicolor&O.sativa&A.thaliana" = 258,
"P.dactylifera&M.acuminata&S.bicolor&B.distachyon&O.sativa" = 685,
"M.acuminata&S.bicolor&B.distachyon&O.sativa&A.thaliana" = 1458,
"S.bicolor&M.acuminata&P.dactylifera&O.sativa&A.thaliana" = 149,
"B.distachyon&M.acuminata&P.dactylifera&O.sativa&A.thaliana" = 80,
"M.acuminata&S.bicolor&B.distachyon&P.dactylifera&A.thaliana" = 113,
"M.acuminata&S.bicolor&B.distachyon&P.dactylifera&O.sativa&A.thaliana" = 7674
)
# Plot
upset(fromExpression(input),
nintersects = 40,
nsets = 6,
order.by = "freq",
decreasing = T,
mb.ratio = c(0.6, 0.4),
number.angles = 0,
text.scale = 1.1,
point.size = 2.8,
line.size = 1
)