Pair Plots

Published

June 19, 2024

Display code

pacman::p_load(dplyr,purrr,tidyr,ggplot2, here, patchwork, 
  conflicted,stringr, gt, knitr, kableExtra, 
  lubridate,ggh4x,jsonlite,httr, openai)
walk(c("dplyr"), conflict_prefer_all, quiet = TRUE)
options(digits=2, scipen=999, dplyr.summarise.inform=FALSE)

#walk(c("fun_plot"), ~ source(here::here(paste0("R/", .x, ".R"))))

mc24_proto <- read.csv(here("Stimulii","mc24_prototypes.csv")) |> mutate(set=paste0(sbjCode,"_",condit)) 
sbj_cat <- read.csv(here("data","mc24_sbj_cat.csv"))

dfiles <- list(path=list.files(here::here("data/dotSim_data"),full.names=TRUE))

d <- map_dfr(dfiles$path, ~read.csv(.x))

d <- map_dfr(dfiles$path, ~{read.csv(.x) |> 
    mutate(sfile=tools::file_path_sans_ext(basename(.x)))}) |> 
  select(-trial_index, -internal_node_id,-trial_type) |>
   mutate(set = paste(str_extract(item_label_1, "^\\d+"),
                     str_extract(item_label_1, "[a-z]+"), sep = "_")) |>
  mutate(pair_label = paste0(item_label_1,"_",item_label_2)) |>
  relocate(sbjCode,date,set,pair_label,trial,item_label_1,item_label_2,response,rt)

setCounts <- d |> 
  pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> 
  group_by(set) |> summarise(n=n_distinct(sbjCode),resp=mean(response),sd=sd(response)) |> arrange(desc(n))

# length(unique(mc_proto$set)) # 304
setCounts2 <- mc24_proto |> group_by(set) |> 
  slice_head(n=1) |> 
  select(id,file,set) |> 
  left_join(setCounts,by="set") |> 
  mutate(n = ifelse(is.na(n), 0, n), .groups="drop") |> 
  arrange(n) |> ungroup()

pairCounts <- d |> 
  group_by(pair_label,set) |> 
  summarise(n=n(),mean_resp=mean(response),sd=sd(response)) |> 
  arrange(mean_resp) |> # arrange(desc(n)) |> 
  ungroup()



# saveRDS(pairCounts,here::here("data","pairCounts.rds"))
#pairCounts <- readRDS(here("data/pairCounts.rds")) |> arrange(mean_resp)

Display code

pair_plot <- function(Pair,item_labels=FALSE){
  df <- d |> filter(pair_label==Pair) |> slice_head(n=1) 
  dim1=30
  x_limits <- c(-dim1, dim1)  # Set fixed x-axis limits
  y_limits <- c(-dim1, dim1)  # Set fixed y-axis limits

    pat1 <- df %>%
          mutate(pattern_1 = purrr::map(pattern_1, jsonlite::fromJSON)) %>%
          unnest(pattern_1) %>%
          mutate(y=-y, pat=item_label_1) |> select(pair_label,x,y,pat)

    pat2 <- df %>%
          mutate(pattern_2 = purrr::map(pattern_2, jsonlite::fromJSON)) %>%
          unnest(pattern_2) %>%
          mutate(y=-y, pat=item_label_2) |> select(pair_label,x,y,pat)

    pat <- rbind(pat1,pat2)
    pat |> 
    ggplot(aes(x = x, y = y #,fill=pat,col=pat
              )) +
          geom_point(alpha=2,size=.8) +
          coord_cartesian(xlim = x_limits, ylim =y_limits) +
          theme_minimal() +
          facet_wrap(~pat,ncol=2,scales = "fixed") + #axes="all"
          #theme_blank +
          theme_void() + 
          theme(strip.text = element_text(size = 7,hjust=.5),
                #panel.spacing.x=unit(-8.5, "lines"), 
                #strip.background = element_rect(colour = "black", linewidth = 2),
              strip.text.x  = if(item_labels) element_text() else element_blank(), # remove pattern labels
                panel.border = element_blank(),  # Remove borders around facets
                legend.position = "none",
                plot.background = element_rect(fill = "white"),
                panel.background = element_rect(fill = "white"),
        #axis.line.y = element_line(colour = "black", linewidth = .1)
        ) +
      xlim(x_limits[1], x_limits[2]) +  # Set x-axis limits explicitly
    ylim(y_limits[1], y_limits[2])    # Set y-axis limits explicitly
}

save_pair <- function(Pair) {
  p <- pair_plot(Pair)
  ggsave(filename = paste0(here("Stimulii/pair_images/"),Pair, ".png"), plot = p,width = 4, height = 3, dpi = 200)
}


plot_hist_pair <- function(Pair) {
  d |> filter(pair_label==Pair) |>
    ggplot(aes(x = response)) +
    geom_histogram(binwidth=1,fill = 'dodgerblue4') +
    scale_x_continuous(breaks=seq(1, 9, by = 1)) +
    coord_cartesian(xlim = c(1, 9)) +
    theme_minimal() +
   # theme_void() +
    theme(#axis.title.x=element_blank(),
          #axis.title.y=element_blank(),
          plot.title=element_text(face="plain",size=14,hjust=.5,vjust=1),
          axis.text.x=element_text(size=12))  +
    labs(x="Similarity Rating (low to high)", y= "Response Frequency",title="Response Distribution")
}

pair_hist_plot <- function(Pair,item_labels=FALSE,pair_title=""){
    df <- d |> filter(pair_label==Pair) |> slice_head(n=1) 

    pat1 <- df %>%
        mutate(pattern_1 = purrr::map(pattern_1, jsonlite::fromJSON)) %>%
        unnest(pattern_1) %>%
        mutate(y=-y, pat=item_label_1) |> select(pair_label,x,y,pat)

    pat2 <- df %>%
        mutate(pattern_2 = purrr::map(pattern_2, jsonlite::fromJSON)) %>%
        unnest(pattern_2) %>%
        mutate(y=-y, pat=item_label_2) |> select(pair_label,x,y,pat)

    hist <- plot_hist_pair(Pair)

    pat <- rbind(pat1,pat2)

    dim1=30
    max_dim <- max(max(abs(pat$x)),max(abs(pat$y)))+5

    x_limits <- c(-max_dim, max_dim)  # Set fixed x-axis limits
    y_limits <- c(-max_dim, max_dim)  # Set fixed y-axis limits

    pat_p <- pat |> 
    ggplot(aes(x = x, y = y #,fill=pat,col=pat
)) +
        geom_point(alpha=2,size=1.5) +
        coord_cartesian(xlim = x_limits, ylim =y_limits) +
        theme_minimal() +
        facet_wrap(~pat,ncol=2,scales = "fixed") + #axes="all"
        #theme_blank +
        theme(#strip.text = element_text(size = 7,hjust=.5),
            #panel.spacing.x=unit(-8.5, "lines"), 
            #strip.background = element_rect(colour = "black", linewidth = 2),
            strip.text.x  = if(item_labels) element_text() else element_blank(), # remove pattern labels
            panel.border = element_blank(),  # Remove borders around facets
            legend.position = "none",
            plot.background = element_rect(fill = "white"),
            #panel.background = element_rect(fill = "white"),
    #axis.line.y = element_line(colour = "black", linewidth = .1)
    ) +
    labs(title=pair_title) +
    theme_void() +
    theme(strip.text.x  = if(item_labels) element_text() else element_blank(),
    plot.title=element_text(face="plain",size=14,hjust=.5,vjust=1)  )+ 
      xlim(x_limits[1], x_limits[2]) +  # Set x-axis limits explicitly
    ylim(y_limits[1], y_limits[2]) #y_limits[2])    # Set y-axis limits explicitly

    pat_p + hist + plot_layout(widths = c(2.0, 1))
}

Display code

ph <- pairCounts |> 
  slice_tail(n = 1) |> 
  arrange(desc(mean_resp)) |> 
  pull(pair_label) |> 
  map(~pair_hist_plot(.x, item_labels = FALSE,pair_title="Dot Pattern Pair With Highest Similarity Rating")) |> 
  wrap_plots(ncol = 1) 



pl <- pairCounts |> 
  slice_head(n = 1) |> 
  pull(pair_label) |> 
  map(~pair_hist_plot(.x, item_labels = FALSE,pair_title="Dot Pattern Pair With Lowest Similarity Rating")) |> 
  wrap_plots(ncol = 1) 

pair_extremes <- ph / pl

# ggsave(filename=paste0(here::here(),"/assets/dot_extremes.png"),plot=pair_extremes,bg='transparent',dpi=400)


pair_extremes

Display code

#plot_annotation(title = "Dot Pattern Pair With Highest Similarity Rating")











# plot 3 pairs from the middle of pairCounts
# pairCounts |> 
#   slice(ceiling(n()/2)-1:3) |> 
#   pull(pair_label) |> 
#   map(~pair_plot(.x, item_labels = TRUE)) |> 
#   wrap_plots(ncol = 1) + 
#   plot_annotation(title = "Middle Rated Pairs")



# 
# combined_df |> 
#   pull(pair_label) |> 
#   walk(~ {
#     p <- pair_plot(.x, item_labels = TRUE)
#     ggsave(filename = paste0(here("llm/ex_images/"), .x, ".png"), plot = p, width = 4, height = 3, dpi = 200)
#   })

Highest Rated Items

Display code

# pairCounts |> 
#   slice_tail(n = 2) |> 
#   arrange(desc(mean_resp)) |> 
#   pull(pair_label) |> 
#   map(~plot_hist_pair(.x ))

ph <- pairCounts |> 
  slice_tail(n = 7) |> 
  arrange(desc(mean_resp)) |> 
  pull(pair_label) |> 
  map(~pair_hist_plot(.x, item_labels = TRUE)) |> 
  wrap_plots(ncol = 1)  + plot_annotation(title = "Dot Pattern Pair With Highest Similarity Rating")


ph

Lowest Rated Items

Display code

# pairCounts |> 
#   slice_tail(n = 2) |> 
#   arrange(desc(mean_resp)) |> 
#   pull(pair_label) |> 
#   map(~plot_hist_pair(.x ))

pl <- pairCounts |> 
  slice_head(n = 7) |> 
  pull(pair_label) |> 
  map(~pair_hist_plot(.x, item_labels = TRUE)) |> 
  wrap_plots(ncol = 1)  + plot_annotation(title = "Dot Pattern Pair With Lowest Similarity Rating")


pl

--- title: Pair Plots date: last-modified lightbox: true toc: true page-layout: full format: html: grid: sidebar-width: 200px body-width: 1000px margin-width: 200px gutter-width: 1.0rem toc-depth: 3 code-fold: true code-tools: true execute: warning: false eval: true --- ```{r} pacman::p_load(dplyr,purrr,tidyr,ggplot2, here, patchwork, conflicted,stringr, gt, knitr, kableExtra, lubridate,ggh4x,jsonlite,httr, openai) walk(c("dplyr"), conflict_prefer_all, quiet = TRUE) options(digits=2, scipen=999, dplyr.summarise.inform=FALSE) #walk(c("fun_plot"), ~ source(here::here(paste0("R/", .x, ".R")))) mc24_proto <- read.csv(here("Stimulii","mc24_prototypes.csv")) |> mutate(set=paste0(sbjCode,"_",condit)) sbj_cat <- read.csv(here("data","mc24_sbj_cat.csv")) dfiles <- list(path=list.files(here::here("data/dotSim_data"),full.names=TRUE)) d <- map_dfr(dfiles$path, ~read.csv(.x)) d <- map_dfr(dfiles$path, ~{read.csv(.x) |> mutate(sfile=tools::file_path_sans_ext(basename(.x)))}) |> select(-trial_index, -internal_node_id,-trial_type) |> mutate(set = paste(str_extract(item_label_1, "^\\d+"), str_extract(item_label_1, "[a-z]+"), sep = "_")) |> mutate(pair_label = paste0(item_label_1,"_",item_label_2)) |> relocate(sbjCode,date,set,pair_label,trial,item_label_1,item_label_2,response,rt) setCounts <- d |> pivot_longer(cols=c(item_label_1, item_label_2), names_to="item_label", values_to="item") |> group_by(set) |> summarise(n=n_distinct(sbjCode),resp=mean(response),sd=sd(response)) |> arrange(desc(n)) # length(unique(mc_proto$set)) # 304 setCounts2 <- mc24_proto |> group_by(set) |> slice_head(n=1) |> select(id,file,set) |> left_join(setCounts,by="set") |> mutate(n = ifelse(is.na(n), 0, n), .groups="drop") |> arrange(n) |> ungroup() pairCounts <- d |> group_by(pair_label,set) |> summarise(n=n(),mean_resp=mean(response),sd=sd(response)) |> arrange(mean_resp) |> # arrange(desc(n)) |> ungroup() # saveRDS(pairCounts,here::here("data","pairCounts.rds")) #pairCounts <- readRDS(here("data/pairCounts.rds")) |> arrange(mean_resp) ``` ```{r} #| fig-width: 12 #| fig-height: 9 pair_plot <- function(Pair,item_labels=FALSE){ df <- d |> filter(pair_label==Pair) |> slice_head(n=1) dim1=30 x_limits <- c(-dim1, dim1) # Set fixed x-axis limits y_limits <- c(-dim1, dim1) # Set fixed y-axis limits pat1 <- df %>% mutate(pattern_1 = purrr::map(pattern_1, jsonlite::fromJSON)) %>% unnest(pattern_1) %>% mutate(y=-y, pat=item_label_1) |> select(pair_label,x,y,pat) pat2 <- df %>% mutate(pattern_2 = purrr::map(pattern_2, jsonlite::fromJSON)) %>% unnest(pattern_2) %>% mutate(y=-y, pat=item_label_2) |> select(pair_label,x,y,pat) pat <- rbind(pat1,pat2) pat |> ggplot(aes(x = x, y = y #,fill=pat,col=pat )) + geom_point(alpha=2,size=.8) + coord_cartesian(xlim = x_limits, ylim =y_limits) + theme_minimal() + facet_wrap(~pat,ncol=2,scales = "fixed") + #axes="all" #theme_blank + theme_void() + theme(strip.text = element_text(size = 7,hjust=.5), #panel.spacing.x=unit(-8.5, "lines"), #strip.background = element_rect(colour = "black", linewidth = 2), strip.text.x = if(item_labels) element_text() else element_blank(), # remove pattern labels panel.border = element_blank(), # Remove borders around facets legend.position = "none", plot.background = element_rect(fill = "white"), panel.background = element_rect(fill = "white"), #axis.line.y = element_line(colour = "black", linewidth = .1) ) + xlim(x_limits[1], x_limits[2]) + # Set x-axis limits explicitly ylim(y_limits[1], y_limits[2]) # Set y-axis limits explicitly } save_pair <- function(Pair) { p <- pair_plot(Pair) ggsave(filename = paste0(here("Stimulii/pair_images/"),Pair, ".png"), plot = p,width = 4, height = 3, dpi = 200) } plot_hist_pair <- function(Pair) { d |> filter(pair_label==Pair) |> ggplot(aes(x = response)) + geom_histogram(binwidth=1,fill = 'dodgerblue4') + scale_x_continuous(breaks=seq(1, 9, by = 1)) + coord_cartesian(xlim = c(1, 9)) + theme_minimal() + # theme_void() + theme(#axis.title.x=element_blank(), #axis.title.y=element_blank(), plot.title=element_text(face="plain",size=14,hjust=.5,vjust=1), axis.text.x=element_text(size=12)) + labs(x="Similarity Rating (low to high)", y= "Response Frequency",title="Response Distribution") } pair_hist_plot <- function(Pair,item_labels=FALSE,pair_title=""){ df <- d |> filter(pair_label==Pair) |> slice_head(n=1) pat1 <- df %>% mutate(pattern_1 = purrr::map(pattern_1, jsonlite::fromJSON)) %>% unnest(pattern_1) %>% mutate(y=-y, pat=item_label_1) |> select(pair_label,x,y,pat) pat2 <- df %>% mutate(pattern_2 = purrr::map(pattern_2, jsonlite::fromJSON)) %>% unnest(pattern_2) %>% mutate(y=-y, pat=item_label_2) |> select(pair_label,x,y,pat) hist <- plot_hist_pair(Pair) pat <- rbind(pat1,pat2) dim1=30 max_dim <- max(max(abs(pat$x)),max(abs(pat$y)))+5 x_limits <- c(-max_dim, max_dim) # Set fixed x-axis limits y_limits <- c(-max_dim, max_dim) # Set fixed y-axis limits pat_p <- pat |> ggplot(aes(x = x, y = y #,fill=pat,col=pat )) + geom_point(alpha=2,size=1.5) + coord_cartesian(xlim = x_limits, ylim =y_limits) + theme_minimal() + facet_wrap(~pat,ncol=2,scales = "fixed") + #axes="all" #theme_blank + theme(#strip.text = element_text(size = 7,hjust=.5), #panel.spacing.x=unit(-8.5, "lines"), #strip.background = element_rect(colour = "black", linewidth = 2), strip.text.x = if(item_labels) element_text() else element_blank(), # remove pattern labels panel.border = element_blank(), # Remove borders around facets legend.position = "none", plot.background = element_rect(fill = "white"), #panel.background = element_rect(fill = "white"), #axis.line.y = element_line(colour = "black", linewidth = .1) ) + labs(title=pair_title) + theme_void() + theme(strip.text.x = if(item_labels) element_text() else element_blank(), plot.title=element_text(face="plain",size=14,hjust=.5,vjust=1) )+ xlim(x_limits[1], x_limits[2]) + # Set x-axis limits explicitly ylim(y_limits[1], y_limits[2]) #y_limits[2]) # Set y-axis limits explicitly pat_p + hist + plot_layout(widths = c(2.0, 1)) } ``` ```{r} #| fig-width: 11 #| fig-height: 9 ph <- pairCounts |> slice_tail(n = 1) |> arrange(desc(mean_resp)) |> pull(pair_label) |> map(~pair_hist_plot(.x, item_labels = FALSE,pair_title="Dot Pattern Pair With Highest Similarity Rating")) |> wrap_plots(ncol = 1) pl <- pairCounts |> slice_head(n = 1) |> pull(pair_label) |> map(~pair_hist_plot(.x, item_labels = FALSE,pair_title="Dot Pattern Pair With Lowest Similarity Rating")) |> wrap_plots(ncol = 1) pair_extremes <- ph / pl # ggsave(filename=paste0(here::here(),"/assets/dot_extremes.png"),plot=pair_extremes,bg='transparent',dpi=400) pair_extremes #plot_annotation(title = "Dot Pattern Pair With Highest Similarity Rating") # plot 3 pairs from the middle of pairCounts # pairCounts |> # slice(ceiling(n()/2)-1:3) |> # pull(pair_label) |> # map(~pair_plot(.x, item_labels = TRUE)) |> # wrap_plots(ncol = 1) + # plot_annotation(title = "Middle Rated Pairs") # # combined_df |> # pull(pair_label) |> # walk(~ { # p <- pair_plot(.x, item_labels = TRUE) # ggsave(filename = paste0(here("llm/ex_images/"), .x, ".png"), plot = p, width = 4, height = 3, dpi = 200) # }) ``` ## Highest Rated Items ```{r} #| fig-width: 12 #| fig-height: 16 # pairCounts |> # slice_tail(n = 2) |> # arrange(desc(mean_resp)) |> # pull(pair_label) |> # map(~plot_hist_pair(.x )) ph <- pairCounts |> slice_tail(n = 7) |> arrange(desc(mean_resp)) |> pull(pair_label) |> map(~pair_hist_plot(.x, item_labels = TRUE)) |> wrap_plots(ncol = 1) + plot_annotation(title = "Dot Pattern Pair With Highest Similarity Rating") ph ``` ## Lowest Rated Items ```{r} #| fig-width: 12 #| fig-height: 15 # pairCounts |> # slice_tail(n = 2) |> # arrange(desc(mean_resp)) |> # pull(pair_label) |> # map(~plot_hist_pair(.x )) pl <- pairCounts |> slice_head(n = 7) |> pull(pair_label) |> map(~pair_hist_plot(.x, item_labels = TRUE)) |> wrap_plots(ncol = 1) + plot_annotation(title = "Dot Pattern Pair With Lowest Similarity Rating") pl ```