首页 > 解决方案 > 将 XLS 文件转换为 CSV 但错误编号单元格

问题描述

以下代码允许您将文件从a 转换XLS为 a CSV

static void ConvertExcelToCsv(string excelFile, string csvOutputFile, int worksheetNumber = 1)
{
    //Checks if the two files required exist or not and then throws an exception.
    if (!File.Exists(excelFile)) throw new FileNotFoundException(excelFile);
    if (File.Exists(csvOutputFile))
    {
        File.Delete(csvOutputFile);
    }

    // connection string
    var cnnStr = String.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0};Extended Properties=\"Excel 8.0;IMEX=1;HDR=NO\"", excelFile);
    //Creates a new OleDbConnection with an argument of cnn
    var cnn = new OleDbConnection(cnnStr);

    //creates new datatable in memory to store the read excel spreadsheet
    var dt = new DataTable();
    try
    {
        //Opens the new connection called "cnn".
        cnn.Open();
        var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
        if (schemaTable.Rows.Count < worksheetNumber) throw new ArgumentException("The worksheet number provided cannot be found in the spreadsheet");
        string worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"].ToString().Replace("'", "");
        Console.WriteLine("worksheet:" + worksheet);
        string sql = String.Format("select * from [{0}]", worksheet).ToString();
        //string sql = worksheet.ToString();
        Console.WriteLine("sql:" + sql);
        var da = new OleDbDataAdapter(sql, cnn);
        da.Fill(dt);
    }
    catch (OleDbException e)
    {
        throw new ArgumentException(e.Message, "Error during the conversion");

    }
    finally
    {
        // free resources
        cnn.Close();
    }

    // write out CSV data
    using (var wtr = new StreamWriter(csvOutputFile))
    {
        foreach (DataRow row in dt.Rows)
        {
            bool firstLine = true;
            foreach (DataColumn col in dt.Columns)
            {
                if (!firstLine)
                {
                    wtr.Write("~");
                }
                else
                {
                    firstLine = false;
                }
                var data = row[col.ColumnName].ToString().Replace("\"", "\"\"");
                // wtr.Write(String.Format("{0}", data));
                wtr.Write(data.ToString());
            }
            wtr.WriteLine();
        }
    }
}

它将文件转换XLSCSV文件,但是当我有这种类型的单元格时:

这种类型的细胞

以这种方式转换它:

以他的方式转换它

如何更改代码以使其具有相同的值XLS

标签: c#csvxls

解决方案


我上面评论中建议的解决方案将使用Excel Interop Objects将 Excel 工作表导出为CSV文件。

将其应用于您的案例:

using System;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1)
{
    Excel.Application xlApp = null;
    Excel.Workbook xlWorkBook = null;

    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        xlApp = new Excel.Application();
        xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
        var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;

        if (xlSheet is null)
            throw new ArgumentException();

        xlSheet.SaveAs(csvOutputFile, Excel.XlFileFormat.xlCSV);
    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    finally
    {
        xlWorkBook?.Close(false);
        xlApp?.Quit();

        if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
        if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);

        xlWorkBook = null;
        xlApp = null;

        GC.Collect();
        GC.WaitForPendingFinalizers();
    }
}

但是TextInfo.ListSeparator将用作分隔符。如果您需要使用特定字符(~如分隔符),请使用相同的方法:

using System;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1,
    string delimiter = null)
{
    Excel.Application xlApp = null;
    Excel.Workbook xlWorkBook = null;

    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        xlApp = new Excel.Application();
        xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
        var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;                

        if (xlSheet is null)
            throw new ArgumentException();

        if (delimiter is null)
            delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;

        var xlRange = xlSheet.UsedRange;

        using (var sw = new StreamWriter(csvOutputFile))
            //Use:
            //foreach (var r in xlRange.Rows.Cast<Excel.Range>().Skip(1))
            //If the first row is a header row and you want to skip it...
            foreach (Excel.Range row in xlRange.Rows)
                sw.WriteLine(string.Join(delimiter, row.Cells.Cast<Excel.Range>()
                    .Select(x => x.Value2)));

    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
    finally
    {
        xlWorkBook?.Close(false);
        xlApp?.Quit();

        if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
        if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);

        xlWorkBook = null;
        xlApp = null;

        GC.Collect();
        GC.WaitForPendingFinalizers();
    }
}

请注意,单元格越多,处理时间就越长。但是,无论工作表是否具有混合类型的列,您都将按原样获取值。

此外,您可能想尝试OleDb更快处理的方式。无需填充 DataTable 并再次遍历列和行以写入输出行,而是使用OleDbDataReader从工作表中获取每一行的值,通过分隔符连接和分隔它们,然后将字符串传递给SteamWriter.WriteLine方法:

using System;
using System.IO;
using System.Linq;
using System.Data.OleDb;
using System.Globalization;
//...

public static void ConvertExcelToCsv(
    string excelFile,
    string csvOutputFile,
    int worksheetNumber = 1,
    string delimiter = null)
{
    try
    {
        if (!File.Exists(excelFile))
            throw new FileNotFoundException(excelFile);

        if (File.Exists(csvOutputFile))
            File.Delete(csvOutputFile);

        if (delimiter is null)
            delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;

        var cnnStr = $"Provider=Microsoft.Jet.OLEDB.4.0;Data Source={excelFile};" +
            $"Extended Properties='Excel 8.0;HDR=Mo;IMEX=1;'";

        using (var cnn = new OleDbConnection(cnnStr))
        {
            cnn.Open();

            var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);

            if (schemaTable.Rows.Count < worksheetNumber)
                throw new ArgumentException();

            var worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"]
                .ToString().Replace("'", "");

            using (var cmd = new OleDbCommand($"SELECT * FROM [{worksheet}]", cnn))
            using (var r = cmd.ExecuteReader())
            using (var sw = new StreamWriter(csvOutputFile))
                while (r.Read())
                {
                    var values = new object[r.FieldCount];
                    r.GetValues(values);
                    sw.WriteLine(string.Join(delimiter, values));
                }
        }
    }
    catch (FileNotFoundException)
    {
        Console.WriteLine($"'{excelFile}' does not exist!");
    }
    catch (ArgumentException)
    {
        Console.WriteLine("The worksheet number provided does not exist.");
    }
    catch (OleDbException ex)
    {
        Console.WriteLine(ex.Message);
    }
    catch (Exception ex)
    {
        Console.WriteLine(ex.Message);
    }
}

呼叫者,召集者:

void TheCaller()
{
    var xlFile = "XlsFile.xls";
    var csvFile = Path.ChangeExtension(xlFile, "csv");
    var delimiter = "~";
    var sheetNumber = 1;

    Console.WriteLine("Exporting...");
    //ConvertExcelToCsv(xlFile, csvFile, sheetNumber); //To call the first code snippet.
    ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter);
    Console.WriteLine("Done...!");
}

或者您可能需要异步调用,尤其是前两个选项:

async void TheCaller()
{
    var xlFile = "XlsFile.xls";
    var csvFile = Path.ChangeExtension(xlFile, "csv");
    var delimiter = "~";
    var sheetNumber = 1;

    Console.WriteLine("Exporting...");
    await Task.Run(() => ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter));
    Console.WriteLine("Done...!");
}

笔记

1. 要使用 Excel 互操作对象,请添加对Microsoft.Office.Interop.Excel
的引用2. 您可能需要检查一下


推荐阅读