Hello,
It is recomended to post data using dput(). Something like
dput(head(DF, 20)) # or 30
Then, paste the output of this command in a post.
Untested, but I think it should work:
# Create a logical index into 'example2'
ix <- example2$V1 %in% setdiff(example2$V1,example1$V1)
example2[ix, ] # which rows does it index?
You could also save the results of setdiff() and then see which of
example2$V1 are %in% it. If the data.frame has many rows, this way could
save some memory. (setdifff(9 gives only 2 character values, versus 2
TRUE plus n-2 FALSE).
Hope this helps,
Rui Barradas
Em 25-06-2012 17:37, nathalie escreveu:
hi,
I have 2 files example 1 and example 2 and would like to know what is in
example2 and not in example1 (attached)
V1 contain data which could be in duplicated which I am using as
identifiers
I used setdiff(example2$V1,example1$V1) to find the identifiers which
are specific to example2:
[1] "rs2276598" "rs17253672"
I am looking for a way to get an output with all columns (V1 to V14)
for these 2 identifiers....
thanks for any suggestions
format example1
V1 V2 V3 V4 V5 V6
1 rs4685 2:198257795 C ENSG00000115524 ENST00000424674 Transcript
2 rs4685 2:198257795 C ENSG00000115524 ENST00000335508 Transcript
3 rs788018 2:198265526 G ENSG00000115524 ENST00000335508 Transcript
4 rs788023 2:198283305 C ENSG00000115524 ENST00000335508 Transcript
5 rs41284843 2:25536827 A ENSG00000119772 ENST00000406659 Transcript
6 rs41284843 2:25536827 A ENSG00000119772 ENST00000321117 Transcript
7 rs41284843 2:25536827 A ENSG00000119772 ENST00000264709 Transcript
8 rs41284843 2:25536827 A ENSG00000119772 ENST00000380756 Transcript
9 rs3729680 3:178927410 G ENSG00000121879 ENST00000263967 Transcript
10 rs61744960 4:106156163 A ENSG00000168769 ENST00000305737 Transcript
11 rs61744960 4:106156163 A ENSG00000168769 ENST00000413648 Transcript
12 rs61744960 4:106156163 A ENSG00000168769 ENST00000540549 Transcript
13 rs61744960 4:106156163 A ENSG00000168769 ENST00000545826 Transcript
14 rs61744960 4:106156163 A ENSG00000168769 ENST00000380013 Transcript
15 rs61744960 4:106156163 A ENSG00000168769 ENST00000535110 Transcript
16 rs61744960 4:106156163 A ENSG00000168769 ENST00000394764 Transcript
17 rs61744960 4:106156163 A ENSG00000168769 ENST00000513237 Transcript
18 rs61744960 4:106156163 A ENSG00000168769 ENST00000265149 Transcript
19 rs2454206 4:106196951 G ENSG00000168769 ENST00000540549 Transcript
20 rs2454206 4:106196951 G ENSG00000168769 ENST00000513237 Transcript
V7 V8 V9 V10 V11 V12 V13
1 SYNONYMOUS_CODING 704 705 235 V gtA/gtG rs4685
2 SYNONYMOUS_CODING 3749 3657 1219 V gtA/gtG rs4685
3 SYNONYMOUS_CODING 2723 2631 877 G ggT/ggC rs788018
4 SYNONYMOUS_CODING 515 423 141 K aaA/aaG rs788023
5 SYNONYMOUS_CODING 365 27 9 P ccC/ccT
rs41284843
6 SYNONYMOUS_CODING 264 27 9 P ccC/ccT
rs41284843
7 SYNONYMOUS_CODING 365 27 9 P ccC/ccT
rs41284843
8 NMD_TRANSCRIPT,SYNONYMOUS_CODING 264 27 9 P ccC/ccT
rs41284843
9 NON_SYNONYMOUS_CODING 1330 1173 391 I/M atA/atG
rs3729680
10 NON_SYNONYMOUS_CODING 1468 1064 355 G/D gGt/gAt
rs61744960
11 NON_SYNONYMOUS_CODING 1204 1064 355 G/D gGt/gAt
rs61744960
12 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt
rs61744960
13 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt
rs61744960
14 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt
rs61744960
15 NON_SYNONYMOUS_CODING 1167 1064 355 G/D gGt/gAt
rs61744960
16 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt
rs61744960
17 NON_SYNONYMOUS_CODING 1924 1127 376 G/D gGt/gAt
rs61744960
18 NMD_TRANSCRIPT,NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt
rs61744960
19 NON_SYNONYMOUS_CODING 6144 5284 1762 I/V Ata/Gta
rs2454206
20 NON_SYNONYMOUS_CODING 6144 5347 1783 I/V Ata/Gta
rs2454206
V14
1 ENSP=ENSP00000409435;HGNC=SF3B1
2 ENSP=ENSP00000335321;HGNC=SF3B1
3 ENSP=ENSP00000335321;HGNC=SF3B1
4 ENSP=ENSP00000335321;HGNC=SF3B1
5 ENSP=ENSP00000384852;HGNC=DNMT3A
6 ENSP=ENSP00000324375;HGNC=DNMT3A
7 ENSP=ENSP00000264709;HGNC=DNMT3A
8 ENSP=ENSP00000370132;HGNC=DNMT3A
9
ENSP=ENSP00000263967;PolyPhen=benign(0.019);SIFT=tolerated(0.13);HGNC=PIK3CA
10
ENSP=ENSP00000306705;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2
11
ENSP=ENSP00000391448;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
12
ENSP=ENSP00000442788;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
13
ENSP=ENSP00000442867;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2
14
ENSP=ENSP00000369351;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
15
ENSP=ENSP00000438851;PolyPhen=probably_damaging(0.998);SIFT=deleterious(0.01);HGNC=TET2
16
ENSP=ENSP00000378245;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2
17
ENSP=ENSP00000425443;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
18
ENSP=ENSP00000265149;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2
19
ENSP=ENSP00000442788;PolyPhen=benign(0.029);SIFT=tolerated(0.15);HGNC=TET2
20
ENSP=ENSP00000425443;PolyPhen=benign(0.029);SIFT=tolerated(0.15);HGNC=TET2
> example2
V1 V2 V3 V4 V5 V6
1 rs4685 2:198257795 C ENSG00000115524 ENST00000424674 Transcript
2 rs4685 2:198257795 C ENSG00000115524 ENST00000335508 Transcript
3 rs788018 2:198265526 G ENSG00000115524 ENST00000335508 Transcript
4 rs788023 2:198283305 C ENSG00000115524 ENST00000335508 Transcript
5 rs2276598 2:25469502 T ENSG00000119772 ENST00000321117 Transcript
6 rs2276598 2:25469502 T ENSG00000119772 ENST00000380756 Transcript
7 rs2276598 2:25469502 T ENSG00000119772 ENST00000402667 Transcript
8 rs2276598 2:25469502 T ENSG00000119772 ENST00000380746 Transcript
9 rs2276598 2:25469502 T ENSG00000119772 ENST00000264709 Transcript
10 rs3729680 3:178927410 G ENSG00000121879 ENST00000263967 Transcript
11 rs61744960 4:106156163 A ENSG00000168769 ENST00000305737 Transcript
12 rs61744960 4:106156163 A ENSG00000168769 ENST00000413648 Transcript
13 rs61744960 4:106156163 A ENSG00000168769 ENST00000540549 Transcript
14 rs61744960 4:106156163 A ENSG00000168769 ENST00000545826 Transcript
15 rs61744960 4:106156163 A ENSG00000168769 ENST00000380013 Transcript
16 rs61744960 4:106156163 A ENSG00000168769 ENST00000535110 Transcript
17 rs61744960 4:106156163 A ENSG00000168769 ENST00000394764 Transcript
18 rs61744960 4:106156163 A ENSG00000168769 ENST00000513237 Transcript
19 rs61744960 4:106156163 A ENSG00000168769 ENST00000265149 Transcript
20 rs17253672 4:106156187 T ENSG00000168769 ENST00000305737 Transcript
V7 V8 V9 V10 V11 V12 V13
1 SYNONYMOUS_CODING 704 705 235 V gtA/gtG rs4685
2 SYNONYMOUS_CODING 3749 3657 1219 V gtA/gtG rs4685
3 SYNONYMOUS_CODING 2723 2631 877 G ggT/ggC rs788018
4 SYNONYMOUS_CODING 515 423 141 K aaA/aaG rs788023
5 SYNONYMOUS_CODING 1503 1266 422 L ctG/ctA
rs2276598
6 NMD_TRANSCRIPT,SYNONYMOUS_CODING 1503 1266 422 L ctG/ctA
rs2276598
7 SYNONYMOUS_CODING 745 597 199 L ctG/ctA
rs2276598
8 SYNONYMOUS_CODING 813 699 233 L ctG/ctA
rs2276598
9 SYNONYMOUS_CODING 1604 1266 422 L ctG/ctA
rs2276598
10 NON_SYNONYMOUS_CODING 1330 1173 391 I/M atA/atG
rs3729680
11 NON_SYNONYMOUS_CODING 1468 1064 355 G/D gGt/gAt
rs61744960
12 NON_SYNONYMOUS_CODING 1204 1064 355 G/D gGt/gAt
rs61744960
13 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt
rs61744960
14 NON_SYNONYMOUS_CODING 1924 1064 355 G/D gGt/gAt
rs61744960
15 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt
rs61744960
16 NON_SYNONYMOUS_CODING 1167 1064 355 G/D gGt/gAt
rs61744960
17 NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt
rs61744960
18 NON_SYNONYMOUS_CODING 1924 1127 376 G/D gGt/gAt
rs61744960
19 NMD_TRANSCRIPT,NON_SYNONYMOUS_CODING 1450 1064 355 G/D gGt/gAt
rs61744960
20 NON_SYNONYMOUS_CODING 1492 1088 363 P/L cCt/cTt
rs17253672
V14
1 ENSP=ENSP00000409435;HGNC=SF3B1
2 ENSP=ENSP00000335321;HGNC=SF3B1
3 ENSP=ENSP00000335321;HGNC=SF3B1
4 ENSP=ENSP00000335321;HGNC=SF3B1
5 ENSP=ENSP00000324375;HGNC=DNMT3A
6 ENSP=ENSP00000370132;HGNC=DNMT3A
7 ENSP=ENSP00000384237;HGNC=DNMT3A
8 ENSP=ENSP00000370122;HGNC=DNMT3A
9 ENSP=ENSP00000264709;HGNC=DNMT3A
10
ENSP=ENSP00000263967;PolyPhen=benign(0.019);SIFT=tolerated(0.13);HGNC=PIK3CA
11
ENSP=ENSP00000306705;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2
12
ENSP=ENSP00000391448;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
13
ENSP=ENSP00000442788;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
14
ENSP=ENSP00000442867;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2
15
ENSP=ENSP00000369351;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
16
ENSP=ENSP00000438851;PolyPhen=probably_damaging(0.998);SIFT=deleterious(0.01);HGNC=TET2
17
ENSP=ENSP00000378245;PolyPhen=probably_damaging(0.983);SIFT=deleterious(0.01);HGNC=TET2
18
ENSP=ENSP00000425443;PolyPhen=possibly_damaging(0.825);SIFT=deleterious(0);HGNC=TET2
19
ENSP=ENSP00000265149;PolyPhen=probably_damaging(0.952);SIFT=deleterious(0.01);HGNC=TET2
20
ENSP=ENSP00000306705;PolyPhen=possibly_damaging(0.602);SIFT=deleterious(0);HGNC=TET2
______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.
______________________________________________
R-help@r-project.org mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.