c# - 将 XLS 文件转换为 CSV 但错误编号单元格
问题描述
以下代码允许您将文件从a 转换XLS
为 a CSV
:
static void ConvertExcelToCsv(string excelFile, string csvOutputFile, int worksheetNumber = 1)
{
//Checks if the two files required exist or not and then throws an exception.
if (!File.Exists(excelFile)) throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
{
File.Delete(csvOutputFile);
}
// connection string
var cnnStr = String.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data Source={0};Extended Properties=\"Excel 8.0;IMEX=1;HDR=NO\"", excelFile);
//Creates a new OleDbConnection with an argument of cnn
var cnn = new OleDbConnection(cnnStr);
//creates new datatable in memory to store the read excel spreadsheet
var dt = new DataTable();
try
{
//Opens the new connection called "cnn".
cnn.Open();
var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (schemaTable.Rows.Count < worksheetNumber) throw new ArgumentException("The worksheet number provided cannot be found in the spreadsheet");
string worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"].ToString().Replace("'", "");
Console.WriteLine("worksheet:" + worksheet);
string sql = String.Format("select * from [{0}]", worksheet).ToString();
//string sql = worksheet.ToString();
Console.WriteLine("sql:" + sql);
var da = new OleDbDataAdapter(sql, cnn);
da.Fill(dt);
}
catch (OleDbException e)
{
throw new ArgumentException(e.Message, "Error during the conversion");
}
finally
{
// free resources
cnn.Close();
}
// write out CSV data
using (var wtr = new StreamWriter(csvOutputFile))
{
foreach (DataRow row in dt.Rows)
{
bool firstLine = true;
foreach (DataColumn col in dt.Columns)
{
if (!firstLine)
{
wtr.Write("~");
}
else
{
firstLine = false;
}
var data = row[col.ColumnName].ToString().Replace("\"", "\"\"");
// wtr.Write(String.Format("{0}", data));
wtr.Write(data.ToString());
}
wtr.WriteLine();
}
}
}
它将文件转换XLS
为CSV
文件,但是当我有这种类型的单元格时:
以这种方式转换它:
如何更改代码以使其具有相同的值XLS
?
解决方案
我上面评论中建议的解决方案将使用Excel Interop Objects将 Excel 工作表导出为CSV
文件。
将其应用于您的案例:
using System;
using System.IO;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...
public static void ConvertExcelToCsv(
string excelFile,
string csvOutputFile,
int worksheetNumber = 1)
{
Excel.Application xlApp = null;
Excel.Workbook xlWorkBook = null;
try
{
if (!File.Exists(excelFile))
throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
File.Delete(csvOutputFile);
xlApp = new Excel.Application();
xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;
if (xlSheet is null)
throw new ArgumentException();
xlSheet.SaveAs(csvOutputFile, Excel.XlFileFormat.xlCSV);
}
catch (FileNotFoundException)
{
Console.WriteLine($"'{excelFile}' does not exist!");
}
catch (ArgumentException)
{
Console.WriteLine("The worksheet number provided does not exist.");
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
xlWorkBook?.Close(false);
xlApp?.Quit();
if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);
xlWorkBook = null;
xlApp = null;
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
但是TextInfo.ListSeparator将用作分隔符。如果您需要使用特定字符(~如分隔符),请使用相同的方法:
using System;
using System.IO;
using System.Linq;
using System.Globalization;
using System.Runtime.InteropServices;
using Excel = Microsoft.Office.Interop.Excel;
//...
public static void ConvertExcelToCsv(
string excelFile,
string csvOutputFile,
int worksheetNumber = 1,
string delimiter = null)
{
Excel.Application xlApp = null;
Excel.Workbook xlWorkBook = null;
try
{
if (!File.Exists(excelFile))
throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
File.Delete(csvOutputFile);
xlApp = new Excel.Application();
xlWorkBook = xlApp.Workbooks.Open(excelFile, Type.Missing, true);
var xlSheet = xlWorkBook.Worksheets[worksheetNumber] as Excel.Worksheet;
if (xlSheet is null)
throw new ArgumentException();
if (delimiter is null)
delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;
var xlRange = xlSheet.UsedRange;
using (var sw = new StreamWriter(csvOutputFile))
//Use:
//foreach (var r in xlRange.Rows.Cast<Excel.Range>().Skip(1))
//If the first row is a header row and you want to skip it...
foreach (Excel.Range row in xlRange.Rows)
sw.WriteLine(string.Join(delimiter, row.Cells.Cast<Excel.Range>()
.Select(x => x.Value2)));
}
catch (FileNotFoundException)
{
Console.WriteLine($"'{excelFile}' does not exist!");
}
catch (ArgumentException)
{
Console.WriteLine("The worksheet number provided does not exist.");
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
finally
{
xlWorkBook?.Close(false);
xlApp?.Quit();
if (xlWorkBook != null) Marshal.FinalReleaseComObject(xlWorkBook);
if (xlApp != null) Marshal.FinalReleaseComObject(xlApp);
xlWorkBook = null;
xlApp = null;
GC.Collect();
GC.WaitForPendingFinalizers();
}
}
请注意,单元格越多,处理时间就越长。但是,无论工作表是否具有混合类型的列,您都将按原样获取值。
此外,您可能想尝试OleDb
更快处理的方式。无需填充 DataTable 并再次遍历列和行以写入输出行,而是使用OleDbDataReader从工作表中获取每一行的值,通过分隔符连接和分隔它们,然后将字符串传递给SteamWriter.WriteLine方法:
using System;
using System.IO;
using System.Linq;
using System.Data.OleDb;
using System.Globalization;
//...
public static void ConvertExcelToCsv(
string excelFile,
string csvOutputFile,
int worksheetNumber = 1,
string delimiter = null)
{
try
{
if (!File.Exists(excelFile))
throw new FileNotFoundException(excelFile);
if (File.Exists(csvOutputFile))
File.Delete(csvOutputFile);
if (delimiter is null)
delimiter = CultureInfo.CurrentCulture.TextInfo.ListSeparator;
var cnnStr = $"Provider=Microsoft.Jet.OLEDB.4.0;Data Source={excelFile};" +
$"Extended Properties='Excel 8.0;HDR=Mo;IMEX=1;'";
using (var cnn = new OleDbConnection(cnnStr))
{
cnn.Open();
var schemaTable = cnn.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, null);
if (schemaTable.Rows.Count < worksheetNumber)
throw new ArgumentException();
var worksheet = schemaTable.Rows[worksheetNumber - 1]["table_name"]
.ToString().Replace("'", "");
using (var cmd = new OleDbCommand($"SELECT * FROM [{worksheet}]", cnn))
using (var r = cmd.ExecuteReader())
using (var sw = new StreamWriter(csvOutputFile))
while (r.Read())
{
var values = new object[r.FieldCount];
r.GetValues(values);
sw.WriteLine(string.Join(delimiter, values));
}
}
}
catch (FileNotFoundException)
{
Console.WriteLine($"'{excelFile}' does not exist!");
}
catch (ArgumentException)
{
Console.WriteLine("The worksheet number provided does not exist.");
}
catch (OleDbException ex)
{
Console.WriteLine(ex.Message);
}
catch (Exception ex)
{
Console.WriteLine(ex.Message);
}
}
呼叫者,召集者:
void TheCaller()
{
var xlFile = "XlsFile.xls";
var csvFile = Path.ChangeExtension(xlFile, "csv");
var delimiter = "~";
var sheetNumber = 1;
Console.WriteLine("Exporting...");
//ConvertExcelToCsv(xlFile, csvFile, sheetNumber); //To call the first code snippet.
ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter);
Console.WriteLine("Done...!");
}
或者您可能需要异步调用,尤其是前两个选项:
async void TheCaller()
{
var xlFile = "XlsFile.xls";
var csvFile = Path.ChangeExtension(xlFile, "csv");
var delimiter = "~";
var sheetNumber = 1;
Console.WriteLine("Exporting...");
await Task.Run(() => ConvertExcelToCsv(xlFile, csvFile, sheetNumber, delimiter));
Console.WriteLine("Done...!");
}
笔记
1. 要使用 Excel 互操作对象,请添加对Microsoft.Office.Interop.Excel
的引用2. 您可能需要检查一下。
推荐阅读
- java - 在 Apache Phoenix 中显示正确的 HBase 日期类型时出现问题
- java - 如何从同一组邮箱轮询电子邮件但针对不同的服务器实例(例如:Dev、QA、Staging)?
- python - python代码在本地运行良好但在AWS EC2(ubuntu)中运行良好
- android - 短信发送状态广播接收器未在 oreo 中运行
- javascript - RadioButtonFor 在加载页面后不调用 onchange
- substring - 从列表Python系列中获取子字符串
- mongodb - pymongo 查询以获取文档及其子文档
- python - 使用 python 打印最后 20 个 bash 历史记录并保存在 pdf 文件中
- elasticsearch - elasticsearch total_fields.limit 作为全局
- javascript - 即使在使用 parseInt() 方法后,从文本输入计算值时也得到 NaN 而不是数字