awk - awk:根据所选列中的值过滤日志文件
问题描述
使用这种以多列格式排列的输入数据:
#dlgfn #in cluster #LE #rmsd #ats #tors #h_ats #lig_eff
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,1, -2.3600, 238.8992, 80, 29, 2, -0.0303
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_08_lig_cne_177,1, -2.0900, 238.8239, 80, 29, 2, -0.0268
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_08_lig_cne_177,1, -1.9600, 239.3082, 80, 29, 2, -0.0251
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,1, -1.9300, 238.6729, 80, 29, 2, -0.0247
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,2, -1.5500, 239.4734, 80, 29, 2, -0.0199
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,1, -1.0900, 240.2418, 80, 29, 2, -0.0140
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_08_lig_cne_177,1, -0.7700, 212.9619, 80, 29, 2, -0.0099
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_09_lig_cne_177,3, -0.7200, 218.4045, 80, 29, 2, -0.0092
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_09_lig_cne_177,2, -0.6800, 201.6466, 80, 29, 2, -0.0087
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_10_lig_cne_177,2, -0.6300, 240.4892, 80, 29, 2, -0.0081
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_10_lig_cne_177,1, -0.5400, 240.1765, 80, 29, 2, -0.0069
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_09_lig_cne_177,1, -0.5000, 215.6699, 80, 29, 2, -0.0064
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_07_lig_cne_177,1, -0.3800, 199.5818, 80, 29, 2, -0.0049
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_10_lig_cne_177,2, -0.3400, 240.5046, 80, 29, 2, -0.0044
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_09_lig_cne_177,1, -0.3300, 218.2226, 80, 29, 2, -0.0042
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,1, -0.2700, 231.5438, 80, 29, 2, -0.0035
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_05_lig_cne_177,1, -0.0600, 200.8627, 80, 29, 2, -0.0008
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_04_lig_cne_177,2, -0.0300, 220.3135, 80, 29, 2, -0.0004
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_09_lig_cne_177,1, -0.0000, 221.4531, 80, 29, 2, -0.0000
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_08_lig_cne_177,1, 0.0300, 231.2099, 80, 29, 2, 0.0004
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,1, 0.0400, 240.5225, 80, 29, 2, 0.0005
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,1, 0.0500, 232.5498, 80, 29, 2, 0.0006
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_12_lig_cne_177,1, 0.0600, 229.8094, 80, 29, 2, 0.0008
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_11_lig_cne_177,2, 0.2300, 218.2757, 80, 29, 2, 0.0029
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_09_lig_cne_177,3, 0.2700, 220.3585, 80, 29, 2, 0.0035
/Users/gleb/Desktop/scripts/analys_clusters/sub_folders_to_analyse/7000_cne_lig177.AllBoxes/7000_09_lig_cne_177,1, 0.3700, 219.5366, 80, 29, 2, 0.0047
我正在使用以下 AWK 表达式扫描日志并打印行号 + 第 2、3 和 5 列,同时跳过第一(标题)行:
gawk -F'^[^,]*,|, ' 'NR> 1{ print ++lineNumber, $2, $3, $5 }' OFS=', ' "${tmp}"/${dir_name}_cl_${c}.txt >> "${tmp}"/${dir_name}_RG_${c_mod}_proc.csv
获得这样的输出:
ID, POP, dG, LIG
1, 1, -2.3600, 80
2, 1, -2.0900, 80
3, 1, -1.9600, 80
4, 1, -1.9300, 80
5, 2, -1.5500, 80
6, 1, -1.0900, 80
7, 1, -0.7700, 80
8, 3, -0.7200, 80
9, 2, -0.6800, 80
10, 2, -0.6300, 80
11, 1, -0.5400, 80
12, 1, -0.5000, 80
13, 1, -0.3800, 80
14, 2, -0.3400, 80
15, 1, -0.3300, 80
16, 1, -0.2700, 80
17, 1, -0.0600, 80
18, 2, -0.0300, 80
19, 1, -0.0000, 80
20, 1, 0.0300, 80
21, 1, 0.0400, 80
22, 1, 0.0500, 80
23, 1, 0.0600, 80
如何修改此 gawk 表达式以将过滤器设置为第 3 列,以保持初始日志中的所有行都包含正值?值得注意的是,因为 linnes 已经根据第 3 列中的值排列,所以基本上每次我都需要省略最后 N 行(第三列中的正值)。或者,我可以使用一些 sed 表达式(带 -i)来编辑现有文件,删除那些 linnes 吗?
解决方案
您能否尝试以下操作,仅基于您显示的示例,使用 GNU 编写和测试awk
。
awk -F',[[:space:]]*' -v OFS=", " '
FNR==1{
print "ID, POP, dG, LIG"
next
}
$3<0{
print ++line,2,$3,$5
}
$3>=0{ exit }' Input_file
说明:为上述添加详细说明。
awk -F',[[:space:]]*' -v OFS=", " ' ##Starting awk program from here, setting FS as comma with spaces and OFS as comma space.
FNR==1{ ##checking condition if this is first line then do following.
print "ID, POP, dG, LIG" ##Printing header here.
next ##next will skip further statements from here.
}
$3<0{
print ++line,2,$3,$5 ##Printing FNR-1, 2nd 3rd and 5th fields here.
}' Input_file ##Mentioning Input_file name here.
所示样本的输出如下:
ID, POP, dG, LIG
1, 2, -2.3600, 80
2, 2, -2.0900, 80
3, 2, -1.9600, 80
4, 2, -1.9300, 80
5, 2, -1.5500, 80
推荐阅读
- c - VS Code 在从 GitHub 下载的 cs50.h 头文件中显示错误
- django - 404 - 在 Azure Web 服务中部署 Django 和 Vue js 集成项目时,静态文件找不到文件错误
- c - 在C中的链表中搜索值
- airflow - 如何在非气流运算符 python 函数中访问 Xcom 值
- r - 当轴标题中有上标时,如何调整绘图区域以使其与其他图形相等?
- netlogo - 连接一定半径内的海龟
- android - 通话时开启扬声器,Android 10及以上版本无法使用
- angular - 如何在角度 10 中使用异步管道检索数据后执行一些操作
- xamarin.forms - 如何在以 Xamarin 形式流式传输 rtsp 时拍摄快照
- ubuntu - 如何从 systemd 服务从同一台 Ubuntu 机器启动新的 XSession