using iTextSharp.text.pdf;
using iTextSharp.text.pdf.parser;
using System;
using System.IO;
using System.Text;
namespace
PdfContentReader
{
public
static
class
Program
{
public
static
string ReadPdfFile(string fileName)
{
StringBuilder text =
new
StringBuilder();
if
(File.Exists(fileName))
{
PdfReader pdfReader =
new
PdfReader(fileName);
for
(int page = 1; page <= pdfReader.NumberOfPages; page++)
{
ITextExtractionStrategy strategy =
new
SimpleTextExtractionStrategy();
string currentText = PdfTextExtractor.GetTextFromPage(pdfReader, page, strategy);
currentText = Encoding.UTF8.GetString(ASCIIEncoding.Convert(Encoding.Default, Encoding.UTF8, Encoding.Default.GetBytes(currentText)));
text.Append(currentText);
}
pdfReader.Close();
}
return
text.ToString();
}
public
static
void Main(string[] args)
{
string fileName = @
"path\to\file.pdf"
;
string extractedText = ReadPdfFile(fileName);
Console.WriteLine(extractedText);
}
}
}