首页 > 解决方案 > 如何根据模式文本将文件中的文本对齐到看起来像 bash 中的表格?

问题描述

我有以下文字

'   14411.7647 e0       - 2647.0588 e3       + 7352.9412 e12      + 14411.7647 e123       21828.2063'
' - 2647.0588 e3       + 7352.9412 e12        7814.9002'
'   14411.7647 e0       + 14411.7647 e123       20381.3131'
'   14411.7647 e0       + 14411.7647 e123       20381.3131'
'   0.0000 e0       + 0.0000 e123       1.9293e-12'
'   14411.7647'

并且我想根据 eXXX 术语进行对齐,使其看起来像一个表格。这可能是一个示例输出:

' 14411.7647 e0     - 2647.0588 e3      + 7352.9412 e12     + 14411.7647 e123   21828.2063'                 
'                   - 2647.0588 e3      + 7352.9412 e12                          7814.9002'                 
' 14411.7647 e0                                             + 14411.7647 e123   20381.3131'                 
' 14411.7647 e0                                             + 14411.7647 e123   20381.3131'                 
'     0.0000 e0                                                 + 0.0000 e123   1.9293e-12'                 
'                                                                               14411.7647'                                                                                                                                                                         

最重要的部分是将 eXXX 项与其系数对齐。

更新:列最初由空格分隔。例如,输出可以用制表符分隔。

UPDATE2:第一行表示总列数。没有比第一行更多的列。第二行和后续行中的 exxx 可以与第一行相同或不同,但您永远不会找到比第一行更多的术语,也不会是无序的(即 e12 总是在 e3 之后)

这可以使用 awk 或类似方法来实现吗?

标签: bashawksedgrep

解决方案


$ cat tst.awk
BEGIN { OFS="\t" }
{
    # Get rid of all single quotes at the start/end of lines
    gsub(/^\047|\047$/,"")

    # Attach the +/- sign when present to the number to its right
    # to normalize how the fields are presented on each line.
    gsub(/\+ /,"+")
    gsub(/- /,"-")
}
NR==1 {
    # Consider each pair like "14411.7647 e0" to be one field with
    # "e0" as the key that determines the output order for that field
    # and "14411.7647" as the value associated with that key. Here
    # we create an array that remembers the order of the keys.
    for (i=1; i<=NF; i+=2) {
        key = $(i+1)
        fldNr2key[++numFlds] = key
    }
}
{
    # Populate an array that maps the key to its value
    delete key2val
    for (i=1; i<=NF; i+=2) {
        key = $(i+1)
        val = $i
        key2val[key] = val
    }

    # Print the values by the order of the keys
    out = ""
    for (fldNr=1; fldNr<=numFlds; fldNr++) {
        key = fldNr2key[fldNr]
        fld = ""
        if (key in key2val) {
            val = key2val[key]
            fld = val (key ~ /./ ? " " key : "")
            sub(/^[-+]/,"& ",fld) # restore the blank after a leading +/-
        }
        out = out fld (fldNr<numFlds ? OFS : "")
    }
    print "\047 " out "\047"
}

制表符分隔的输出:

$ awk -f tst.awk file
' 14411.7647 e0 - 2647.0588 e3  + 7352.9412 e12 + 14411.7647 e123       21828.2063'
'       - 2647.0588 e3  + 7352.9412 e12         7814.9002'
' 14411.7647 e0                 + 14411.7647 e123       20381.3131'
' 14411.7647 e0                 + 14411.7647 e123       20381.3131'
' 0.0000 e0                     + 0.0000 e123   1.9293e-12'
'                               14411.7647'

可视化表格输出(或为脚本中的每个字段使用适当宽度的 printfs):

$ awk -f tst.awk file | column -s$'\t' -t
' 14411.7647 e0  - 2647.0588 e3  + 7352.9412 e12  + 14411.7647 e123  21828.2063'
'                - 2647.0588 e3  + 7352.9412 e12                     7814.9002'
' 14411.7647 e0                                   + 14411.7647 e123  20381.3131'
' 14411.7647 e0                                   + 14411.7647 e123  20381.3131'
' 0.0000 e0                                       + 0.0000 e123      1.9293e-12'
'                                                                    14411.7647'

推荐阅读