首页 > 解决方案 > Go 没有将完整的数据写入文本文件

问题描述

我正在尝试探索 Go 并发性。这里Grabber()打印并写入执行结果。程序打印预期结果,但不将其写入urls.txt. 谁能向我解释我在这里缺少什么?

main.go

package main
import (
    "bufio"
    "fmt"
    "io/ioutil"
    "log"
    "net/http"
    "os"
    "regexp"
    "strings"
    "sync"
)

var wg sync.WaitGroup
var mt sync.Mutex

// Final Literation
func main() {
    file, err := os.Open("ip.txt")
    if err != nil {
        log.Fatal(err)
    }
    defer file.Close()
    scanner := bufio.NewScanner(file)
    for scanner.Scan() {
        go Grabber(scanner.Text())
        wg.Add(1)

    }
    wg.Wait()

    if err := scanner.Err(); err != nil {
        log.Fatal(err)
    }
}

// stringInArray do If string in list return true false otherwise.
func stringInArray(a string, list []string) bool {
    for _, b := range list {
        if b == a {
            return true
        }
    }
    return false
}

// Grabber Do Search the bing and collect array of sitelist
func Grabber(ip string) {
    defer wg.Done()
    var output []string
    outfile, err := os.Create("urls.txt")
    if err != nil {
        log.Fatal(err)
    }
    defer outfile.Close()
    if ip == "" {

    }
    page := 1
    for page < 251 {
        client := &http.Client{}
        req, err := http.NewRequest(
            http.MethodGet,
            fmt.Sprintf(
                "http://www.bing.com/search?q=ip:%s+&count=50&first=1",
                ip,
            ),
            nil,
        )
        if err != nil {

        }
        req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 6.1; rv:57.0) Gecko/20100101 Firefox/57.0")
        res, err := client.Do(req)
        if err != nil {
            fmt.Println("Invalid Request")
        }
        defer res.Body.Close()
        body, err := ioutil.ReadAll(res.Body)
        if err != nil {
            fmt.Println("Couldn't Read")
        }
        re := regexp.MustCompile(`<h2><a href="(.*?)"`)
        links := re.FindAllString(string(body), -1)
        if links != nil {
            for l := range links {
                o := strings.Split(links[l], `"`)
                d := strings.Split(o[1], "/")
                s := d[0] + "//" + d[2]
                if !stringInArray(s, output) {
                    output = append(output, s)
                }
            }
        }
        page = page + 50
    }
    for _, links := range output {
        fmt.Println(links)
        fmt.Fprintln(outfile, links)
    }
}

ip.txt 作为输入

103.253.145.129
103.253.146.125
103.253.146.239
103.253.147.72
146.185.176.79
146.185.176.45
146.185.179.250
146.185.180.35
146.185.180.185
146.185.180.113
146.185.181.51
146.185.183.107
146.185.183.202
146.185.183.248
146.185.183.219
146.185.184.69
146.185.185.169

git repo URLGrabber

标签: go

解决方案


您正在调用create每个 goroutine,这将截断文件。相反,在外部创建文件,并使用另一个 goroutine 序列化对其的写入:

    outfile, err := os.Create("urls.txt")
    results:=make(chan []string)
    go func() {
       for output:=range results { 
          for _, links := range output {
            fmt.Fprintln(outfile, links)
          }      
       }
    }()

    scanner := bufio.NewScanner(file)
    for scanner.Scan() {
        go Grabber(scanner.Text(), results)
        wg.Add(1)

    }
    wg.Wait()
    close(results)

在 Grabber 中获取结果时,不要将其写入文件,而是将其写入通道:

    results<-output
    for _, links := range output {
        fmt.Println(links)
    }

推荐阅读