首页 > 解决方案 > 按纬度汇总包含数字和字符串的数据框

问题描述

我希望能够按位置(纬度)计算以下数据集的平均“IntakeEnergy”。我认为我遇到的问题是数据集同时包含数字和字符,这导致函数不知道如何计算字符串的平均值。我尝试按纬度对数据进行分组,然后使用以下方法计算其他列的平均值:

blah<-profit.fall.all %>% group_by(Lat) %>% summarise_all(funs(mean))

但是,这只有在我删除所有字符串列时才会成功。我对 excel 更加熟悉,我相信一个潜在的解决方案是创建一个数据透视表,但我不确定这是否是解决这个问题的最佳方法。

Data<-dput(head(profit.fall.all,15))
structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L, 
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1", 
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3", 
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)", 
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3", 
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E", 
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1", 
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1", 
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5", 
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2", 
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2", 
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud", 
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh", 
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1", 
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4", 
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4", 
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC", 
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("", 
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L, 
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("", 
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater", 
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L, 
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("", 
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109, 
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109, 
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354, 
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058, 
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511, 
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058), 
    Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia", 
    "Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079, 
    29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049, 
    454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192, 
    38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962, 
    57.7990761383585, 11.4733747438)), row.names = c(1L, 5L, 
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")
> View(Data)

标签: rpivot-tablegroup-summaries

解决方案


最后两个选项。希望这些足够接近,您可以设法改变自己的口味。我觉得我给了你四个有效的答案,你不能完全描述你想要什么


Data %>% 
  group_by(Lat) %>% 
  mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id) %>%
  select(-Prey.Group) 
#> # A tibble: 15 x 8
#> # Groups:   Lat [12]
#>    Id         Season Refuge    HType  Long   Lat IntakeEnergy avgIntakeEnergyby…
#>    <fct>      <fct>  <fct>     <fct> <dbl> <dbl>        <dbl>              <dbl>
#>  1 BH Low 2   Fall   Bishop's… Low   -76.0  38.2         64.3              259. 
#>  2 BH Low 2   Fall   Bishop's… Low   -76.0  38.2        455.               259. 
#>  3 BI High 1  Fall   Barren I… High  -76.3  38.3       1183.              1183. 
#>  4 BI Mud     Fall   Barren I… Mud   -76.3  38.3         22.8               17.1
#>  5 BI Mud     Fall   Barren I… Mud   -76.3  38.3         11.5               17.1
#>  6 BW Money … Fall   Blackwat… Low   -76.2  38.4         30.5               30.5
#>  7 BW Mud 1   Fall   Blackwat… Mud   -76.1  38.4        483.               483. 
#>  8 BW SAV 1   Fall   Blackwat… SAV   -76.1  38.4        113.               503. 
#>  9 BW SAV 1   Fall   Blackwat… SAV   -76.1  38.4        893.               503. 
#> 10 BW SAV 2   Fall   Blackwat… SAV   -76.1  38.4      16854.             16854. 
#> 11 EN High 2  Fall   Eastern … High  -76.2  39.0         38.6               38.6
#> 12 EN Low 1   Fall   Eastern … Low   -76.2  39.0        558.               558. 
#> 13 MWP 30 Mu… Fall   Martin    Mud   -76.0  38.0         57.8               57.8
#> 14 MWP 33 Lo… Fall   Martin    Low   -76.0  38.0       6326.              6326. 
#> 15 MWP 34 Lo… Fall   Martin    Low   -76.0  38.0         29.7               29.7

Data %>% 
  group_by(Lat) %>% 
  mutate(avgIntakeEnergybyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id) %>%
  select(-Prey.Group, -IntakeEnergy) %>%
  distinct(Lat, .keep_all = TRUE)
#> # A tibble: 12 x 7
#> # Groups:   Lat [12]
#>    Id             Season Refuge        HType  Long   Lat avgIntakeEnergybyLat
#>    <fct>          <fct>  <fct>         <fct> <dbl> <dbl>                <dbl>
#>  1 BH Low 2       Fall   Bishop's Head Low   -76.0  38.2                259. 
#>  2 BI High 1      Fall   Barren Island High  -76.3  38.3               1183. 
#>  3 BI Mud         Fall   Barren Island Mud   -76.3  38.3                 17.1
#>  4 BW Money Stump Fall   Blackwater    Low   -76.2  38.4                 30.5
#>  5 BW Mud 1       Fall   Blackwater    Mud   -76.1  38.4                483. 
#>  6 BW SAV 1       Fall   Blackwater    SAV   -76.1  38.4                503. 
#>  7 BW SAV 2       Fall   Blackwater    SAV   -76.1  38.4              16854. 
#>  8 EN High 2      Fall   Eastern Neck  High  -76.2  39.0                 38.6
#>  9 EN Low 1       Fall   Eastern Neck  Low   -76.2  39.0                558. 
#> 10 MWP 30 Mud 1   Fall   Martin        Mud   -76.0  38.0                 57.8
#> 11 MWP 33 Low 3   Fall   Martin        Low   -76.0  38.0               6326. 
#> 12 MWP 34 Low 4   Fall   Martin        Low   -76.0  38.0                 29.7

原来OP想要添加一列并保留其他列和行......

library(dplyr)

Data %>% 
  group_by(Lat) %>% 
  mutate(avgbyLat = mean(IntakeEnergy, na.rm = TRUE)) %>% 
  arrange(Id)
#> # A tibble: 15 x 9
#> # Groups:   Lat [12]
#>    Id        Season Refuge    HType  Long   Lat Prey.Group IntakeEnergy avgbyLat
#>    <fct>     <fct>  <fct>     <fct> <dbl> <dbl> <fct>             <dbl>    <dbl>
#>  1 BH Low 2  Fall   Bishop's… Low   -76.0  38.2 Melampus           64.3    259. 
#>  2 BH Low 2  Fall   Bishop's… Low   -76.0  38.2 Melampus          455.     259. 
#>  3 BI High 1 Fall   Barren I… High  -76.3  38.3 Melampus         1183.    1183. 
#>  4 BI Mud    Fall   Barren I… Mud   -76.3  38.3 Melampus           22.8     17.1
#>  5 BI Mud    Fall   Barren I… Mud   -76.3  38.3 Scirpus            11.5     17.1
#>  6 BW Money… Fall   Blackwat… Low   -76.2  38.4 Ruppia             30.5     30.5
#>  7 BW Mud 1  Fall   Blackwat… Mud   -76.1  38.4 Ruppia            483.     483. 
#>  8 BW SAV 1  Fall   Blackwat… SAV   -76.1  38.4 Melampus          113.     503. 
#>  9 BW SAV 1  Fall   Blackwat… SAV   -76.1  38.4 Ruppia            893.     503. 
#> 10 BW SAV 2  Fall   Blackwat… SAV   -76.1  38.4 Ruppia          16854.   16854. 
#> 11 EN High 2 Fall   Eastern … High  -76.2  39.0 Ruppia             38.6     38.6
#> 12 EN Low 1  Fall   Eastern … Low   -76.2  39.0 Ruppia            558.     558. 
#> 13 MWP 30 M… Fall   Martin    Mud   -76.0  38.0 Ruppia             57.8     57.8
#> 14 MWP 33 L… Fall   Martin    Low   -76.0  38.0 Ruppia           6326.    6326. 
#> 15 MWP 34 L… Fall   Martin    Low   -76.0  38.0 Melampus           29.7     29.7

与之前的答案相反。

试试summarise_if(is.numeric, mean, na.rm = TRUE)[确定你只想要Lat]

library(dplyr)

Data %>% group_by(Lat) %>% summarise_if(is.numeric, mean, na.rm = TRUE)
#> # A tibble: 12 x 3
#>      Lat  Long IntakeEnergy
#>    <dbl> <dbl>        <dbl>
#>  1  38.0 -76.0       6326. 
#>  2  38.0 -76.0         29.7
#>  3  38.0 -76.0         57.8
#>  4  38.2 -76.0        259. 
#>  5  38.3 -76.3       1183. 
#>  6  38.3 -76.3         17.1
#>  7  38.4 -76.1      16854. 
#>  8  38.4 -76.1        483. 
#>  9  38.4 -76.2         30.5
#> 10  38.4 -76.1        503. 
#> 11  39.0 -76.2        558. 
#> 12  39.0 -76.2         38.6

数据在哪里...

Data <- structure(list(Id = structure(c(35L, 70L, 20L, 5L, 15L, 5L, 35L,
34L, 36L, 47L, 33L, 50L, 69L, 66L, 20L), .Label = c("Barren Island Mud 1",
"BH High 1", "BH High 2", "BH Low 1", "BH Low 2", "BH Low 3",
"BH SAV 2", "BHH 1 C", "BHH 2 E", "BHL 1 E", "BHL 2", "BHL 3 (B)",
"BHM 1", "BHM 1 C", "BI High 1", "BI Low 1", "BI Low 2C", "BI Low 3",
"BI Marsh B", "BI Mud", "BIHI High B", "BIL1 (low) E", "BIL1 E",
"BIL1E", "BIL2 E", "BIL2E", "BW Fresh 1", "BW Fresh 2", "BW High 1",
"BW High 2", "BW High 5", "BW Low 3", "BW Money Stump", "BW Mud 1",
"BW SAV 1", "BW SAV 2", "BWH 1 D", "BWH 2", "BWH 3", "BWH 5",
"BWL 1", "BWL 2", "BWL 3", "BWM 1", "BWMS D", "BWS 1", "EN High 2",
"EN High 4", "EN High 5", "EN Low 1", "EN Low 2", "EN Mud 2",
"ENH3 A High", "ENH4 A High", "ENH5 A High", "ENL1 Low E", "ENM1 A Mud",
"ENS1 SAV", "ENS2 SAV 2C", "ENS3 SAV 3E", "High 3C", "James Marsh",
"MWP 27 High 1", "MWP 28 High 2", "MWP 29 Low 1", "MWP 30 Mud 1",
"MWP 31 Low 2", "MWP 32 Mud 2", "MWP 33 Low 3", "MWP 34 Low 4",
"MWP 35 Mud 3", "PWRC Fresh", "PWRC Fresh 1", "PWRC Fresh 1-4",
"WP 27 HM-MARC", "WP 28 HM-MARC", "WP 30 IT MARE", "WP29 LM-MARC",
"WP30 IT MARE"), class = "factor"), Season = structure(c(2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("",
"Fall", "Spring", "Spring?"), class = "factor"), Refuge = structure(c(5L,
7L, 2L, 3L, 2L, 3L, 5L, 5L, 5L, 6L, 5L, 6L, 7L, 7L, 2L), .Label = c("",
"Barren Island", "Bishop's Head", "Bishops Head", "Blackwater",
"Eastern Neck", "Martin", "PWRC"), class = "factor"), HType = structure(c(6L,
4L, 5L, 4L, 3L, 4L, 6L, 5L, 6L, 3L, 4L, 4L, 4L, 5L, 5L), .Label = c("",
"Fresh", "High", "Low", "Mud", "SAV"), class = "factor"), Long = c(-76.109109,
-75.99733, -76.261634, -76.038959, -76.256617, -76.038959, -76.109109,
-76.146408, -76.103627, -76.225188, -76.23491, -76.239864, -75.99354,
-76.01407, -76.261634), Lat = c(38.441089, 37.99369, 38.336058,
38.224469, 38.326234, 38.224469, 38.441089, 38.417947, 38.403511,
39.04065, 38.43141, 39.026771, 37.98833, 38.01108, 38.336058),
Prey.Group = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 3L), .Label = c("Melampus", "Ruppia",
"Scirpus", "Zannichellia"), class = "factor"), IntakeEnergy = c(112.577988769079,
29.6957686910562, 22.825852053573, 64.2669620925843, 1182.80971128049,
454.559045812661, 893.487901876808, 483.341619235618, 16853.8824849192,
38.6168499963705, 30.540123199744, 557.798944392019, 6326.03316402962,
57.7990761383585, 11.4733747438)), row.names = c(1L, 5L,
6L, 7L, 8L, 15L, 22L, 23L, 24L, 25L, 33L, 35L, 42L, 43L, 45L), class = "data.frame")

glimpse(Data)
#> Rows: 15
#> Columns: 8
#> $ Id           <fct> BW SAV 1, MWP 34 Low 4, BI Mud, BH Low 2, BI High 1, BH …
#> $ Season       <fct> Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fall, Fa…
#> $ Refuge       <fct> Blackwater, Martin, Barren Island, Bishop's Head, Barren…
#> $ HType        <fct> SAV, Low, Mud, Low, High, Low, SAV, Mud, SAV, High, Low,…
#> $ Long         <dbl> -76.10911, -75.99733, -76.26163, -76.03896, -76.25662, -…
#> $ Lat          <dbl> 38.44109, 37.99369, 38.33606, 38.22447, 38.32623, 38.224…
#> $ Prey.Group   <fct> Melampus, Melampus, Melampus, Melampus, Melampus, Melamp…
#> $ IntakeEnergy <dbl> 112.57799, 29.69577, 22.82585, 64.26696, 1182.80971, 454…

reprex 包(v0.3.0)于 2020 年 5 月 7 日创建


推荐阅读