try this: Bos$type <- tree$Type[match(Bos$spp, tree$spp)]
On Feb 8, 2008 3:17 PM, Thompson, David (MNR) <[EMAIL PROTECTED]> wrote: > Hello, > > I am unable to figure out how to code a new column in a data frame based > on an existing column that matches a column in a reference data frame, > in a relational-db fashion. I would like this to maintain a minimum set > of reference tables that may be reused over several similar datasets. > > Specifically, I have two data frames as listed below, 'Bos' and 'tree.' > For each case in 'Bos' I want to look up the matching 'spp' code in > 'tree' and insert the associated 'type' code into a new 'type' column in > 'Bos' as in: > > # add type and keep factors from reference list > Bos$type <- tree[as.character(tree$spp)==as.character(Bos$spp), > 'type'] > Bos$keep <- tree[tree$spp==Bos$spp, 'keep'] > > And I know I have seen this before but, can't remember where. I have > filtered through many of the threads referencing > 'as.numeric(levels(Bos$spp))[as.integer(Bos$spp)]' handling of factors, > any kind of sql reference I could think of, anything 'ODBC'-ish but, I > think this may be an indexing issue. I am trying to compare elements of > two different sized (list) objects (different type objects even?) and > not cycling through Bos$spp to find matches in tree$spp as expected, . . > . , has this an apply solution? > > My data frames: > > dput(head(Bos, 30)) > structure(list(oplt = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, > 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), rplt = c(3, > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, > 0, 0, 0, 0, 0, 0, 0, 0), tree = c(32, 101, 102, 103, 104, 105, > 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, > 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129), spp = > structure(c(10L, > 10L, 12L, 14L, 10L, 10L, 14L, 10L, 15L, 10L, 9L, 3L, 10L, 10L, > 12L, 10L, 13L, 12L, 12L, 10L, 12L, 10L, 10L, 8L, 5L, 2L, 10L, > 2L, 12L, 10L), .Label = c("AW", "BD", "BE", "BF", "BW", "BY", > "CB", "HE", "IW", "MH", "MR", "OR", "PO", "SW", "SA"), class = > "factor"), > dbh = c(12.1, 10.1, 63.3, 9, 7.1, 12.1, 13.9, 6.3, 6.1, 7.9, > 5.1, 9.8, 7.1, 18.7, 44.2, 28.7, 19.8, 28, 46.6, 9, 61.6, > 3.3, 9.1, 8.7, 5.8, 3.1, 11.1, 12.3, 28, 8.6), cc = structure(c(2L, > 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, > 2L, 1L, 1L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L), .Label = > c("dom", > "sup"), class = "factor"), ba = c(114.990145103020, > 80.1184666481737, > 3147.0040469356, 63.6172512351933, 39.5919214168654, > 114.990145103020, > 151.746779150021, 31.1724531052447, 29.2246656600190, > 49.0166993776348, > 20.4282062299676, 75.429639612691, 39.5919214168654, > 274.645883758454, > 1534.38526793979, 646.924613208844, 307.907495978336, > 615.7521601036, > 1705.53923570736, 63.6172512351933, 2980.24045490142, > 8.55298599939821, > 65.0388219109427, 59.4467869875528, 26.4207942166902, > 7.54767635024948, > 96.7689077121996, 118.8228881404, 615.7521601036, 58.0880481648753 > )), .Names = c("oplt", "rplt", "tree", "spp", "dbh", "cc", > "ba"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", > "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", > "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30" > ), class = "data.frame") > > > dput(tree) > structure(list(spp = structure(1:33, .Label = c("AB", "AS", "AW", > "BD", "BE", "BF", "BW", "BY", "CA", "CB", "CC", "CE", "DL", "DP", > "EA", "HE", "IW", "LC", "MH", "MM", "MR", "OR", "PO", "PR", "PV", > "PW", "RS", "SA", "SB", "SM", "SW", "VC", "VL"), class = "factor"), > spp.orig = structure(c(1L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 12L, > 10L, 13L, 11L, 14L, 15L, 16L, 17L, 18L, 19L, 20L, 21L, 22L, > 23L, 24L, 25L, 26L, 27L, 28L, 31L, 29L, 30L, 32L, 33L, 34L > ), .Label = c("AB", "AMEHUM", "AMESPP", "AW", "BD", "BE", > "BF", "BW", "BY", "CB", "CE", "CORALT", "CORCOR", "DIELON", > "DIRPAL", "EA", "HE", "IW", "LONCAN", "MH", "MM", "MR", "OR", > "PO", "PR", "PRUVIR", "PW", "RIBSPP", "SB", "SM", "SORAME", > "SW", "VIBACE", "VIBALN"), class = "factor"), OPL = structure(c(15L, > > 7L, 14L, 29L, 13L, 2L, 9L, 8L, 10L, 23L, 1L, 28L, 11L, 12L, > 31L, 30L, 17L, 16L, 5L, 6L, 4L, 25L, 22L, 20L, 24L, 21L, > 26L, 27L, 19L, 3L, 18L, 32L, 33L), .Label = c("HCORCAN", > "WABIBAL", "WACEPEN", "WACERUB", "WACESAS", "WACESPI", "WAMESPP", > "WBETALL", "WBETPAP", "WCORALT", "WDIELON", "WDIRPAL", "WFAGGRA", > "WFRAAME", "WFRANIG", "WLONCAN", "WOSTVIR", "WPICGLA", "WPICMAR", > "WPINRES", "WPINSTR", "WPOPTRE", "WPRUSER", "WPRUVIV", "WQUERUB", > "WRIBAME", "WSORAME", "WTHUOCC", "WTILAME", "WTSUCAN", "WULMAME", > "WVIBACE", "WVIBLAO"), class = "factor"), form = c(1.1, 1.2, > 1.1, 1.1, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1, 2, 1.1, 1.2, 1.2, > 1.1, 1.1, 1.1, 1.2, 1.1, 1.2, 1.1, 1.1, 1.1, 1.1, 1.2, 1.1, > 1.2, 1.1, 1.1, 1.2, 1.1, 1.2, 1.2), Type = structure(c(2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, > 2L, 2L), .Label = c("H", "W"), class = "factor"), keep = > structure(c(1L, > 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 2L, > 2L, 1L, 2L, 1L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, > 1L, 1L), .Label = c("no", "yes"), class = "factor"), Sname = > structure(c(15L, > 6L, 14L, 29L, 13L, 1L, 8L, 7L, 9L, 23L, 10L, 28L, 11L, 12L, > 31L, 30L, 17L, 16L, 4L, 5L, 3L, 25L, 22L, 20L, 24L, 21L, > 26L, 27L, 19L, 2L, 18L, 32L, 33L), .Label = c("Abies balsamea", > "Acer pensylvanicum", "Acer rubrum", "Acer saccharum", "Acer > spicatum", > "Amelanchier", "Betula alleghaniensis", "Betula papyrifera", > "Cornus alternifolia", "Cornus canadensis", "Diervilla lonicera", > "Dirca palustris", "Fagus grandifolia", "Fraxinus americana", > "Fraxinus nigra", "Lonicera canadensis", "Ostrya virginiana", > "Picea glauca", "Picea mariana", "Pinus resinosa", "Pinus strobus", > "Populus tremuloides", "Prunus serotina", "Prunus virginiana", > "Quercus rubra", "Ribes ", "Sorbus americana", "Thuja occidentalis", > > "Tilia americana", "Tsuga canadensis", "Ulmus americana", > "Viburnum acerifolium", "Viburnum lantanoides"), class = "factor"), > Cname = structure(c(7L, 27L, 30L, 2L, 3L, 6L, 31L, 33L, 1L, > 8L, 10L, 15L, 11L, 21L, 4L, 14L, 20L, 17L, 18L, 23L, 25L, > 24L, 29L, 26L, 12L, 16L, 13L, 5L, 9L, 28L, 32L, 22L, 19L), .Label = > c("Alternate-leaved Dogwood", > "American Basswood", "American Beech", "American Elm", "American > Mountain-ash", > "Balsam Fir", "Black Ash", "Black Cherry", "Black Spruce", > "Bunchberry", "Bush Honeysuckle", "Choke Cherry", "Currant", > "Eastern Hemlock", "Eastern White Cedar", "Eastern White Pine", > "Fly Honeysuckle", "Hard Maple", "Hobblebush", "Ironwood", > "Leatherwood", "Maple-leaved Viburnum", "Mountain Maple", > "Northern Red Oak", "Red Maple", "Red Pine", "Serviceberry", > "Striped Maple", "Trembling Aspen", "White Ash", "White Birch", > "White Spruce", "Yellow Birch"), class = "factor")), .Names = > c("spp", > "spp.orig", "OPL", "form", "Type", "keep", "Sname", "Cname"), row.names > = c("1", > "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", > "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", > "26", "27", "28", "29", "30", "31", "32", "33", "34"), class = > "data.frame") > > Thanks, DaveT. > ************************************* > Silviculture Data Analyst > Ontario Forest Research Institute > Ontario Ministry of Natural Resources > [EMAIL PROTECTED] > http://ofri.mnr.gov.on.ca > > ______________________________________________ > R-help@r-project.org mailing list > https://stat.ethz.ch/mailman/listinfo/r-help > PLEASE do read the posting guide http://www.R-project.org/posting-guide.html > and provide commented, minimal, self-contained, reproducible code. > -- Jim Holtman Cincinnati, OH +1 513 646 9390 What is the problem you are trying to solve? ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.