r - 在R中按组计算字符串模式
问题描述
我正在尝试使用 R 获取数据框中某个字符串的分组计数,但到目前为止还没有找到解决方案。这是我尝试使用的一些示例数据和代码,以便您了解我要完成的工作,以及下面的进一步说明:
simpson <- structure(list(season = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L,
4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L,
5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L), episode_title = c("Simpsons Roasting on an Open Fire",
"Bart the Genius", "Homer's Odyssey", "There's No Disgrace Like Home",
"Bart the General", "Moaning Lisa", "The Call of the Simpsons",
"The Telltale Head", "Life on the Fast Lane", "Homer's Night Out",
"The Crepes of Wrath", "Krusty Gets Busted", "Some Enchanted Evening",
"Bart Gets an \"F\"", "Simpson and Delilah", "Treehouse of Horror",
"Two Cars in Every Garage and Three Eyes on Every Fish", "Dancin' Homer",
"Dead Putting Society", "Bart vs. Thanksgiving", "Bart the Daredevil",
"Itchy & Scratchy & Marge", "Bart Gets Hit by a Car", "One Fish, Two Fish, Blowfish, Blue Fish",
"The Way We Was", "Homer vs. Lisa and the 8th Commandment", "Principal Charming",
"Oh Brother, Where Art Thou?", "Bart's Dog Gets an \"F\"", "Old Money",
"Brush with Greatness", "Lisa's Substitute", "The War of the Simpsons",
"Three Men and a Comic Book", "Blood Feud", "Stark Raving Dad",
"Mr. Lisa Goes to Washington", "When Flanders Failed", "Bart the Murderer",
"Homer Defined", "Like Father, Like Clown", "Treehouse of Horror II",
"Lisa's Pony", "Saturdays of Thunder", "Flaming Moe's", "Burns Verkaufen der Kraftwerk",
"I Married Marge", "Radio Bart", "Lisa the Greek", "Homer Alone",
"Bart the Lover", "Homer at the Bat", "Separate Vocations", "Dog of Death",
"Colonel Homer", "Black Widower", "The Otto Show", "Bart's Friend Falls in Love",
"Brother, Can You Spare Two Dimes?", "Kamp Krusty", "A Streetcar Named Marge",
"Homer the Heretic", "Lisa the Beauty Queen", "Treehouse of Horror III",
"Itchy & Scratchy: The Movie", "Marge Gets a Job", "New Kid on the Block",
"Mr. Plow", "Lisa's First Word", "Homer's Triple Bypass", "Marge vs. the Monorail",
"Selma's Choice", "Brother from the Same Planet", "I Love Lisa",
"Duffless", "Last Exit to Springfield", "So It's Come to This: A Simpsons Clip Show",
"The Front", "Whacking Day", "Marge in Chains", "Krusty Gets Kancelled",
"Homer's Barbershop Quartet", "Cape Feare", "Homer Goes to College",
"Rosebud", "Treehouse of Horror IV", "Marge on the Lam", "Bart's Inner Child",
"Boy-Scoutz 'n the Hood", "The Last Temptation of Homer", "$pringfield (or, How I Learned to Stop Worrying and Love Legalized Gambling)",
"Homer the Vigilante", "Bart Gets Famous", "Homer and Apu", "Lisa vs. Malibu Stacy",
"Deep Space Homer", "Homer Loves Flanders", "Bart Gets an Elephant",
"Burns' Heir", "Sweet Seymour Skinner's Baadasssss Song"), imdb_votes = c(3734L,
1973L, 1709L, 1701L, 1732L, 1674L, 1638L, 1580L, 1578L, 1511L,
1539L, 1716L, 1567L, 1638L, 1588L, 1786L, 1457L, 1381L, 1366L,
1324L, 1522L, 1402L, 1340L, 1687L, 1392L, 1329L, 1241L, 1413L,
1264L, 1243L, 1257L, 1684L, 1246L, 1379L, 1223L, 1798L, 1274L,
1302L, 1446L, 1452L, 1262L, 1369L, 1243L, 1194L, 1618L, 1291L,
1213L, 1365L, 1179L, 1176L, 1272L, 1637L, 1201L, 1165L, 1233L,
1233L, 1176L, 1160L, 1227L, 1414L, 1339L, 1747L, 1194L, 1394L,
1293L, 1163L, 1240L, 1595L, 1350L, 1445L, 2028L, 1153L, 1176L,
1268L, 1209L, 1827L, 1105L, 1122L, 1234L, 1080L, 1269L, 1416L,
2010L, 1476L, 1479L, 1437L, 1132L, 1085L, 1270L, 1308L, 1274L,
1202L, 1123L, 1171L, 1187L, 1505L, 1191L, 1116L, 1143L, 1118L
), us_viewers_in_millions = c(26.7, 24.5, 27.5, 20.2, 27.1, 27.4,
27.6, 28, 33.5, 30.3, 31.2, 30.4, 27.1, 33.6, 29.9, 27.4, 26.1,
26.1, 25.4, 25.9, 26.2, 22.2, 24.8, 24.2, 26.8, 26.2, 23.9, 26.8,
23.9, 21.2, 20.6, 17.7, 19.7, 21, 17.3, 22.9, 20.2, 22.8, 20.8,
20.6, 20.2, 20, 23, 24.7, 23.9, 21.1, 21.9, 24.2, 23.2, 23.7,
20.5, 24.6, 23.7, 23.4, 25.5, 17.3, 17.5, 19.5, 17.2, 21.8, 18.3,
19.3, 19, 25.1, 20.1, 22.9, 23.1, 24, 28.6, 23.6, 23, 24.5, 23.8,
25.2, 25.7, 22.4, 25.5, 20.1, 20, 17.3, 19.4, 19.9, 20, 18.1,
19.5, 24, 21.7, 18.7, 20.1, 20.6, 17.9, 20.1, 20, 21.8, 20.5,
18.2, 18, 17, 14.7, 19.7)), .Names = c("season", "episode_title",
"imdb_votes", "us_viewers_in_millions"), row.names = c(NA, -100L
), class = c("tbl_df", "tbl", "data.frame"))
char_counts <- simpson %>%
group_by(season) %>%
str_count(episode_title, "Homer")
所以我首先按季节对数据进行分组,然后我试图计算在给定季节的任何一集的标题中出现“荷马”一词的总次数。
任何关于我哪里出错的建议将不胜感激。
最好的,柯蒂斯
解决方案
要向每一行添加一个新变量,您需要使用该mutate
函数。group_by
除非您想按组汇总,否则不需要:
simpson %>%
mutate(homer_count = str_count(episode_title, 'Homer'))
# A tibble: 100 x 5
season episode_title imdb_votes us_viewers_in_millions homer_count
<int> <chr> <int> <dbl> <int>
1 1 Simpsons Roasting on an Open Fire 3734 26.7 0
2 1 Bart the Genius 1973 24.5 0
3 1 Homer's Odyssey 1709 27.5 1
4 1 There's No Disgrace Like Home 1701 20.2 0
5 1 Bart the General 1732 27.1 0
6 1 Moaning Lisa 1674 27.4 0
7 1 The Call of the Simpsons 1638 27.6 0
8 1 The Telltale Head 1580 28 0
9 1 Life on the Fast Lane 1578 33.5 0
10 1 Homer's Night Out 1511 30.3 1
# ... with 90 more rows
Homer
如果您想计算每个季节使用了多少次group_by
,然后使用summarize
生成一个新变量,每组一行:
simpson %>%
group_by(season) %>%
summarize(homer_count = sum(str_count(episode_title, 'Homer')))
# A tibble: 5 x 2
season homer_count
<int> <int>
1 1 2
2 2 2
3 3 4
4 4 2
5 5 7
推荐阅读
- r - 当数据集中没有符合输入的观察值时,对闪亮的 selectizeInput 进行过滤并显示空白图
- sql-server - @Transaction(propagation = Propagation.REQUIRES_NEW) 在 MS SQL 存储过程中不可见
- javascript - 当我尝试读取文件时,从 API 中提取文件并通过管道传输会导致文件未找到错误
- java - 获取一个带有随机数的数组来比较数字
- javascript - 我可以使用 Chrome 扩展程序更改“无互联网连接”页面吗?
- python - 为什么 dask 不并行化这个工作流程?
- angular - 如何在 Angular 组件上动态加载资产图像?
- reactjs - React Router v4 - 无法 console.log {match.params.id} 或另存为变量
- javascript - 如何使用函数对 JavaScript 对象中的属性施加条件
- c# - 使用 vb.net 水晶报表打印时出错?