using System;
using System.IO;
using System.Windows.Forms;
using Aspose.Pdf;
using Aspose.Pdf.Text;
using OfficeOpenXml;
namespace PDFTableExtractor
{
public partial class MainForm : Form
{
private string pdfFilePath = string.Empty;
private System.Data.DataTable extractedTable = null;
public MainForm()
{
InitializeComponent();
// 設(shè)置 EPPlus 許可證上下文(如果是非商業(yè)用途,可以使用社區(qū)許可證)
ExcelPackage.LicenseContext = LicenseContext.NonCommercial;
}
// 上傳 PDF 按鈕點擊事件
private void btnUploadPDF_Click(object sender, EventArgs e)
{
using (OpenFileDialog openFileDialog = new OpenFileDialog())
{
openFileDialog.Filter = "PDF Files|*.pdf";
openFileDialog.Title = "選擇 PDF 文件";
if (openFileDialog.ShowDialog() == DialogResult.OK)
{
pdfFilePath = openFileDialog.FileName;
try
{
// 提取表格數(shù)據(jù)
extractedTable = ExtractTablesFromPDF(pdfFilePath);
if (extractedTable != null && extractedTable.Rows.Count > 0)
{
MessageBox.Show($"成功從 PDF 提取 {extractedTable.Rows.Count} 行數(shù)據(jù)!", "提示", MessageBoxButtons.OK, MessageBoxIcon.Information);
btnExportExcel.Enabled = true;
}
else
{
MessageBox.Show("未從 PDF 中提取到表格數(shù)據(jù)。", "提示", MessageBoxButtons.OK, MessageBoxIcon.Warning);
btnExportExcel.Enabled = false;
}
}
catch (Exception ex)
{
MessageBox.Show($"提取表格時發(fā)生錯誤:{ex.Message}", "錯誤", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
}
}
// 從 PDF 提取表格數(shù)據(jù)
private System.Data.DataTable ExtractTablesFromPDF(string filePath)
{
System.Data.DataTable dataTable = new System.Data.DataTable("PDFTable");
bool tableInitialized = false;
// 加載 PDF 文檔
Document pdfDocument = new Document(filePath);
// 初始化 TableAbsorber
TableAbsorber absorber = new TableAbsorber();
// 遍歷所有頁面
foreach (Page page in pdfDocument.Pages)
{
// 訪問頁面并提取表格
absorber.Visit(page);
// 遍歷所有找到的表格
foreach (AbsorbedTable table in absorber.TableList)
{
// 遍歷行
foreach (AbsorbedRow row in table.RowList)
{
// 如果是第一次處理表格,則創(chuàng)建列
if (!tableInitialized)
{
foreach (AbsorbedCell cell in row.CellList)
{
dataTable.Columns.Add(); // 可以根據(jù)需要設(shè)置列名
}
tableInitialized = true;
}
// 創(chuàng)建新行
System.Data.DataRow dataRow = dataTable.NewRow();
int colIndex = 0;
// 遍歷單元格
foreach (AbsorbedCell cell in row.CellList)
{
string cellText = "";
// 提取單元格中的文本片段
foreach (TextFragment textFragment in cell.TextFragments)
{
cellText += textFragment.Text;
}
dataRow[colIndex++] = cellText.Trim();
}
dataTable.Rows.Add(dataRow);
}
}
}
return dataTable;
}
// 導(dǎo)出到 Excel 按鈕點擊事件
private void btnExportExcel_Click(object sender, EventArgs e)
{
if (extractedTable == null || extractedTable.Rows.Count == 0)
{
MessageBox.Show("沒有數(shù)據(jù)可導(dǎo)出。", "提示", MessageBoxButtons.OK, MessageBoxIcon.Warning);
return;
}
// 選擇保存位置
saveFileDialog1.FileName = $"PDF_Extracted_Table_{DateTime.Now:yyyyMMddHHmmss}.xlsx";
if (saveFileDialog1.ShowDialog() == DialogResult.OK)
{
try
{
SaveTableToExcel(extractedTable, saveFileDialog1.FileName);
MessageBox.Show($"數(shù)據(jù)已成功導(dǎo)出到:{saveFileDialog1.FileName}", "成功", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
catch (Exception ex)
{
MessageBox.Show($"導(dǎo)出 Excel 時發(fā)生錯誤:{ex.Message}", "錯誤", MessageBoxButtons.OK, MessageBoxIcon.Error);
}
}
}
// 將 DataTable 保存到 Excel
private void SaveTableToExcel(System.Data.DataTable dataTable, string filePath)
{
using (ExcelPackage excelPackage = new ExcelPackage(new FileInfo(filePath)))
{
ExcelWorksheet worksheet = excelPackage.Workbook.Worksheets.Add("提取的數(shù)據(jù)");
// 設(shè)置列頭
for (int i = 0; i < dataTable.Columns.Count; i++)
{
worksheet.Cells[1, i + 1].Value = dataTable.Columns[i].ColumnName;
worksheet.Cells[1, i + 1].Style.Font.Bold = true;
}
// 填充數(shù)據(jù)
for (int row = 0; row < dataTable.Rows.Count; row++)
{
for (int col = 0; col < dataTable.Columns.Count; col++)
{
worksheet.Cells[row + 2, col + 1].Value = dataTable.Rows[row][col];
}
}
// 自動調(diào)整列寬
worksheet.Cells[worksheet.Dimension.Address].AutoFitColumns();
excelPackage.Save();
}
}
}
}