On Thu, Feb 12, 2015 at 7:12 PM, arnaud gaboury <arnaud.gabo...@gmail.com> wrote: > On Thu, Feb 12, 2015 at 3:40 PM, arnaud gaboury > <arnaud.gabo...@gmail.com> wrote: >> I have two df (and dt): >> >> df1 >> structure(list(name = c("poisonivy", "poisonivy", "poisonivy", >> "poisonivy", "poisonivy", "poisonivy", "poisonivy", "poisonivy", >> "cruzecontrol", "agreenmamba", "agreenmamba", "vairis", "vairis", >> "vairis", "vairis", "vairis", "vairis", "xaeth"), text = c("ok", >> "need items ?", "i didn't submit pass codes for a long now", >> "ok", "<@U03AEKYL4>: what app are you talking about ?", "some testing >> with my irc client", >> "ha ha sorry", "for me there is no such question", "Lol.", >> "<@U03AEKWTL|agreenmamba> uploaded a file: >> <https://enlightened.slack.com/files/agreenmamba/F03KGRF3W/screenshot_2015-02-09-14-31-15.png|regarding: >> should I stay or should I go?>", >> "<@U032FHV3S> <http://youtu.be/oGIFublvDes>", "ok, see you around", >> "yeah, I had a procrastination rush so I started to decode a little", >> "<http://ingress.com/intel|ingress.com/intel> when you submit passcodes", >> "intel", "what is the cooldown time or how does it work...;", >> "anybody knows how does \"Passcode circuitry too hot. Wait for cool >> down to enter another passcode.\" works?", >> "and people told that agent their geocities experience would never >> amount to anything (the convo yesterday) " >> ), ts = c("1423594336.000138", "1423594311.000136", "1423594294.000135", >> "1423594258.000133", "1423594244.000131", "1423497058.000127", >> "1423497041.000126", "1423478555.000123", "1423494427.000125", >> "1423492370.000124", "1423478364.000121", "1423594358.000139", >> "1423594329.000137", "1423594264.000134", "1423594251.000132", >> "1423592204.000130", "1423592174.000129", "1423150354.000112" >> )), .Names = c("name", "text", "ts"), class = c("data.table", >> "data.frame"), row.names = c(NA, -18L)) >> >> df2 >> structure(list(id = c("U03KH8Z52", "U02AF1DTJ", "U02AF0ZT8", >> "U03AEKWTL", "U02BCJH0G", "U033YA1MS", "U029QMCRR", "U03H139M5", >> "U02AET1D0", "U02A6U41Z", "U02B5T4CX", "U02B2QU4R", "U03F0LQ5X", >> "U03JNFKLY", "U02ASMBMQ", "U029QLQC7", "U03AEMBQU", "U02B4D3Q1", >> "U02AGDC14", "U029A467C", "U02A7NFG6", "U02AESPPL", "U02AQANK7", >> "U03ADJDFK", "U03EYR0KB", "U02AW7Q5Q", "U02AE8RKD", "U02FT84BS", >> "U02B25M3B", "U03EZDQT7", "U02AECKFF", "U03H2691M", "U02DWTJ5V", >> "U02AFTAHH", "U029QQEPM", "U03C51Z42", "U02CAK2CV", "U03AK21DP", >> "U03FFN8ED", "U02B23V03", "U029T2143", "U02C1LEEX", "U03AF2QH2", >> "U03E0GN0S", "U03AG20R9", "U02AES8S2", "U02AG64S7", "U02B5A0R7", >> "U02AS4SLR", "U03C2SG0R", "U03AV7CCW", "U032XPFDU", "U03AUKSSV", >> "U02C2A61Y", "U02AESHJQ", "U02BLSKHU", "U02E34WM6", "U03AK6P26", >> "U02E6ADRZ", "U03FCDQ50", "U03EW1CC5", "U02BL0DBD", "U02FHQZ6D", >> "U02B47T63", "U03H2TTQP", "U03AVP71V", "U03JLV38V", "U02E39HAY", >> "U02AE5281", "U032FHV3S", "U03AL2096", "U02ARUG6M", "U02AECRSP", >> "U02B42XG4", "U03AFQZNS", "U02AE7H41", "U03G9UNTG", "U02GEQ0E6", >> "U02AGLE5A", "U02BQTRC9", "U03H0J6GS", "U02B3D27F", "U02AEKTHV", >> "U02C52YN3", "U02E33MUW", "U03AKUT85", "U03B53EHG", "U02FBN38P", >> "U03AH3E5W", "U02B5PLE0", "U02AS4RCK", "U03ANE1GZ", "U02E8LZQB", >> "U03EPGJ98", "U02E3N220", "U03AEKYL4", "U02AE7HT1", "U02C1RR3G", >> "U03JH408J", "U03KL0FKN", "U02B44R92", "U03EURWGX"), name = c("10k_affair", >> "1upwuzhere", "4xcss", "agreenmamba", "ait109", "arly69", "azkop13", >> "barcik75", "bigolnob", "blackrose", "blink619", "bobaloo23", >> "bodger", "bomb", "bootswithdefer", "brandizzle", "bregalad", >> "camon", "celticrain", "ch3mical", "checksum", "cocothunder", >> "cruxicon", "cruzecontrol", "crystalskunk", "cscheetah", "dabcelin", >> "deelicious", "delthanar", "drkaosdk", "droidenl-joe", "dukeceph", >> "fillerbunny", "flickohmsford", "flyingg0d", "garaxiel", "goby9", >> "gymbal", "hideandseek", "hobojr", "ijackportals", "invalidcharactr", >> "itso9", "j0shs", "jarvis", "jc0mm5", "jencyberchic", "jimbobradyson", >> "joespr0cket", "jostrander", "jueliet", "karlashi", "khan99", >> "kingkonn0r", "krispycridder", "kritickalmass", "lawgiver", "maxcorbett", >> "memory556", "meta000x", "minkovsky", "mistylady", "mstephans", >> "mstrinity", "nocarryr", "ollietronic", "philistine11", "pickledpickles", >> "piercingsbykris", "poisonivy", "raugmor", "remarks999", "rheds77", >> "rhinz", "rigiritter", "robbie0017", "rohdef", "ryoziya", "s4n1ty", >> "sacredcow133", "samwill", "sgtlemonpepper", "sivan", "spline9", >> "starwolf", "stueliueli", "sweetiris", "swift2plunder", "swissphoenix", >> "synyck", "test", "therug", "tinja551", "trulyjuan", "twinster", >> "vairis", "vinylz3ro", "watervirus", "xaeth", "yagamiyukari", >> "zafo", "zexium")), .Names = c("id", "name"), class = c("data.table", >> "data.frame"), row.names = c(NA, -102L)) >> >> I need to replace this regex pattern in df1 : >> (?<=<@)[^|]{9}(?=>|) by its corresponding name from df2. >> >> E.g : if <@U03KH8Z52> is found in df1, then I want to replace it by >> the "name" which correspond to this id in df2., in this case >> 10k_affair >> >> I know of replace an expression with gsub: >> gsub('(?<=<@)[^|]{9}(?=>|)', 'toto', df1, perl = T) >> but I have no idea how to replace it with value from another df. >> >> Thank you for hints > > I am gathering some pieces of the puzzles. > >> regmatches(df1$text,regexpr('(?<=<@)[^|]{9}(?=>|)',df1$text, perl = T)) > [1] "U032FHV3S" "U03AEKWTL" "U03AEKYL4" > The above commands extract the needed pattern > > df2[grep("U032FHV3S",df2$id),][[2]] > [1] "poisonivy" > The above command returns the name in the same row than the id. I need > more than one name (in my case, I need 3) > > Shall I now write a loop and get a list of my needed name ? Pseudo > code would be something like: > > for i %in% regmatches(df1$text,regexpr('(?<=<@)[^|]{9}(?=>|)',df1$text, > perl = T)) > df2[grep("i",df2$id),][[2]] > > > Thank you for hint about how I shall proceed. >
Better approach than a loop: > extrac <- regmatches(df1$text,regexpr('(?<=<@)[^|]{9}(?=>|)',df1$text, perl = > T)) > extrac [1] "U032FHV3S" "U03AEKWTL" "U03AEKYL4" > df2[df2$id %in% extrac id name 1: U03AEKWTL agreenmamba 2: U032FHV3S poisonivy 3: U03AEKYL4 vairis ______________________________________________ R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.