temukan string dengan karakter serupa dalam pengaturan grup di R

> dput(mydf)
structure(list(pID = structure(c(69L, 69L, 69L, 69L, 69L, 69L, 
69L, 69L, 69L, 73L, 73L, 73L, 73L), .Label = c("S001", "S002", 
"S003", "S004", "S005", "S006", "S007", "S009", "S012", "S013", 
"S014", "S015", "S016", "S017", "S020", "S021", "S022", "S025", 
"S027", "S028", "S029", "S030", "S032", "S035", "S036", "S038", 
"S039", "S040", "S041", "S042", "S043", "S044", "S045", "S047", 
"S048", "S049", "S050", "S051", "S052", "S053", "S056", "S057", 
"S058", "S059", "S060", "S061", "S062", "S063", "S064", "S065", 
"S066", "S067", "S069", "S070", "S071", "S073", "S075", "S076", 
"S077", "S078", "S079", "S080", "S081", "S082", "S083", "S084", 
"S087", "S088", "S089", "S090", "S091", "S093", "S095", "S097", 
"S099", "S100", "S101", "S103", "S104", "S105", "S106", "S107", 
"S109", "S110", "S112", "S113", "S114", "S115", "S116", "S117", 
"S118", "S119", "S121", "S123", "S124", "S125", "S127", "S128", 
"S129", "S130", "S133", "S134", "S135", "S136", "S138", "S139", 
"S141", "S142", "S143", "S144", "S145", "S146", "S149", "S150", 
"S151", "S152", "S153", "S154", "S155", "S156", "S157", "S161", 
"S163", "S164", "S166", "S168", "S170", "S171", "S172", "S176", 
"S177", "S179", "S180", "S182", "S183", "S188", "S189", "S190", 
"S191", "S192", "S195", "S197", "S200", "S201", "S202", "S204", 
"S211", "S214", "S217", "S218", "S220", "S222", "S224", "S229", 
"S231", "S234", "S235", "S238", "S246", "S250", "S251", "S254", 
"S327", "S333", "S338", "S441", "S467", "S486", "S503", "S523", 
"S532"), class = "factor"), tID = structure(c(9L, 13L, 14L, 18L, 
23L, 27L, 28L, 10L, 19L, 8L, 14L, 17L, 23L), .Label = c("", "3T1_1", 
"3T3_1", "3T3_2", "3T4_1", "3T4_2", "T", "T1", "T1_1", "T1_2", 
"T1_3", "T1_4", "T11", "T2", "T2_1", "T2_2", "T3", "T3_1", "T3_2", 
"T4", "T4_1", "T4_2", "T5", "T5_1", "T5_2", "T6", "T8", "T9"), class = "factor"), 
    sID = structure(c(25L, 25L, 25L, 25L, 25L, 25L, 25L, NA, 
    NA, 27L, 27L, NA, 27L), .Label = c("", "P1", "P10", "P11", 
    "P12", "P13", "P14", "P15", "P16", "P17", "P18", "P19", "P2", 
    "P20", "P21", "P22", "P23", "P24", "P25", "P26", "P27", "P28", 
    "P29", "P3", "P30", "P31", "P32", "P33", "P34", "P35", "P36", 
    "P37", "P38", "P39", "P4", "P40", "P41", "P42", "P43", "P44", 
    "P45", "P5", "P6", "P7", "P8", "P9"), class = "factor")), class = "data.frame", row.names = c(NA, 
-13L))

Dalam bingkai data di atas saya ingin melakukan 2 hal;

  1. Pada kolom sID isi nilai NA dengan string yang sama dengan baris lainnya pada kolom tersebut berdasarkan nilai pada kolom pID. Jadi semua S089 akan memiliki P30 di sID.
  2. Buat kolom baru, kelompokkan berdasarkan pID lalu tandai setiap baris sebagai unik jika nilai tID sebelum _ adalah berbeda jika tidak berulang. Jadi S089 akan ditandai diulang untuk T1_1, T1_2 dan T3_1 dan T3_2 dan baris lainnya akan ditandai unik

Seharusnya bisa dilakukan dengan dplyer, saya kira. Terima kasih banyak!


person symo    schedule 09.07.2020    source sumber


Jawaban (1)


Dengan premis itu

  • di dalam setiap grup pID, sID selalu tetap sama
  • jika nilai di tID berisi garis bawah (_) maka nilai tersebut diulangi (karena inilah yang muncul dari data sampel Anda)

salah satu solusi yang mungkin menggunakan tidyr dan dplyr adalah

library(tidyr)
library(dplyr)

mydf %>% 
  group_by(pID) %>% 
  fill(sID, .direction = "downup") %>% 
  mutate(new_col = ifelse(grepl("_", tID), "repeated", "unique"))

Keluaran

# A tibble: 13 x 4
# Groups:   pID [2]
#    pID   tID   sID   new_col 
#    <fct> <fct> <fct> <chr>   
#  1 S089  T1_1  P30   repeated
#  2 S089  T11   P30   unique  
#  3 S089  T2    P30   unique  
#  4 S089  T3_1  P30   repeated
#  5 S089  T5    P30   unique  
#  6 S089  T8    P30   unique  
#  7 S089  T9    P30   unique  
#  8 S089  T1_2  P30   repeated
#  9 S089  T3_2  P30   repeated
# 10 S095  T1    P32   unique  
# 11 S095  T2    P32   unique  
# 12 S095  T3    P32   unique  
# 13 S095  T5    P32   unique
person Ric S    schedule 09.07.2020
comment
Terima kasih atas jawabannya! Ingin tahu apakah mungkin menambahkan angka ke berapa kali setiap tID diulang? - person symo; 09.07.2020
comment
Tentu saja mungkin, tetapi saya yakin ini harus diposting sebagai pertanyaan yang berbeda karena dapat dicari dan bermanfaat bagi pengguna di masa mendatang juga :) - person Ric S; 09.07.2020