Reader Level:
Article
C#

Convert HTML to Word Then Word to PDF With C#

By Lizzy Landy on Jul 14 2011
This method does not convert HTML to PDF directly, but instead first converts HTML to Word then converts Word to PDF.
    • Like
    • Love It
    • Awesome
    • Interesting
    • It's Okay
    • Thumbs Down
  • 52.5k
  • 0

After getting the method about converting Office to PDF, I recently researched methods for converting a HTML file to PDF and collected many materials. Now I have one method that I want to share with you.

This method does not convert HTML to PDF directly, but instead first converts HTML to Word then converts Word to PDF.

Using the Code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using Word = Microsoft.Office.Interop.Word;
using oWord = Microsoft.Office.Interop.Word;
using System.Reflection;

using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using Microsoft.Office.Core;
using System.Text.RegularExpressions;

namespace WindowsApplication2
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
object oMissing = System.Reflection.Missing.Value;
object oEndOfDoc = "\\endofdoc"; /* \endofdoc is a predefined bookmark */

//Start Word and create a new document.

Word._Application oWord;
Word._Document oDoc;
oWord = new Word.Application();
oWord.Visible = true;
oDoc = oWord.Documents.Add(ref oMissing, ref oMissing,
ref oMissing, ref oMissing);

//Insert a paragraph at the beginning of the document.
Word.Paragraph oPara1;
oPara1 = oDoc.Content.Paragraphs.Add(ref oMissing);
oPara1.Range.Text = "Heading 1";
oPara1.Range.Font.Bold = 1;
oPara1.Format.SpaceAfter = 24; //24 pt spacing after paragraph.
oPara1.Range.InsertParagraphAfter();
//Insert a paragraph at the end of the document.
Word.Paragraph oPara2;
object oRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oPara2 = oDoc.Content.Paragraphs.Add(ref oRng);
oPara2.Range.Text = "Heading 2";
oPara2.Format.SpaceAfter = 6;
oPara2.Range.InsertParagraphAfter();

//Insert another paragraph.
Word.Paragraph oPara3;
oRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oPara3 = oDoc.Content.Paragraphs.Add(ref oRng);
oPara3.Range.Text = "This is a sentence of normal text. Now here is a table:";
oPara3.Range.Font.Bold = 0;
oPara3.Format.SpaceAfter = 24;
oPara3.Range.InsertParagraphAfter();

//Insert a 3 x 5 table, fill it with data, and make the first row bold and italic.
Word.Table oTable;
Word.Range wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oTable = oDoc.Tables.Add(wrdRng, 3, 5, ref oMissing, ref oMissing);
oTable.Range.ParagraphFormat.SpaceAfter = 6;
int r, c;
string strText;
for (r = 1; r <= 3; r++)
for (c = 1; c <= 5; c++)
{
strText = "r" + r + "c" + c;
oTable.Cell(r, c).Range.Text = strText;
}
oTable.Rows[1].Range.Font.Bold = 1;
oTable.Rows[1].Range.Font.Italic = 1;

//Add some text after the table.
Word.Paragraph oPara4;
oRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oPara4 = oDoc.Content.Paragraphs.Add(ref oRng);
oPara4.Range.InsertParagraphBefore();
oPara4.Range.Text = "And here's another table:";
oPara4.Format.SpaceAfter = 24;
oPara4.Range.InsertParagraphAfter();

//Insert a 5 x 2 table, fill it with data, and change the column widths.
wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oTable = oDoc.Tables.Add(wrdRng, 5, 2, ref oMissing, ref oMissing);
oTable.Range.ParagraphFormat.SpaceAfter = 6;
for (r = 1; r <= 5; r++)
for (c = 1; c <= 2; c++)
{
strText = "r" + r + "c" + c;
oTable.Cell(r, c).Range.Text = strText;
}
oTable.Columns[1].Width = oWord.InchesToPoints(2); //Change width of columns 1 & 2
oTable.Columns[2].Width = oWord.InchesToPoints(3);

//Keep inserting text. When you get to 7 inches from top of the document, insert a hard page break.
object oPos;
double dPos = oWord.InchesToPoints(7);
oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range.InsertParagraphAfter();
do
{
wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
wrdRng.ParagraphFormat.SpaceAfter = 6;
wrdRng.InsertAfter("A line of text");
wrdRng.InsertParagraphAfter();
oPos = wrdRng.get_Information
(Word.WdInformation.wdVerticalPositionRelativeToPage);
}
while (dPos >= Convert.ToDouble(oPos));
object oCollapseEnd = Word.WdCollapseDirection.wdCollapseEnd;
object oPageBreak = Word.WdBreakType.wdPageBreak;
wrdRng.Collapse(ref oCollapseEnd);
wrdRng.InsertBreak(ref oPageBreak);
wrdRng.Collapse(ref oCollapseEnd);
wrdRng.InsertAfter("We're now on page 2. Here's my chart:");
wrdRng.InsertParagraphAfter();

//Insert a chart.
Word.InlineShape oShape;
object oClassType = "MSGraph.Chart.8";
wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oShape = wrdRng.InlineShapes.AddOLEObject(ref oClassType, ref oMissing,
ref oMissing, ref oMissing, ref oMissing,
ref oMissing, ref oMissing, ref oMissing);

//Demonstrate use of late bound oChart and oChartApp objects to manipulate the chart object with MSGraph.
object oChart;
object oChartApp;
oChart = oShape.OLEFormat.Object;
oChartApp = oChart.GetType().InvokeMember("Application",
BindingFlags.GetProperty, null, oChart, null);

//Change the chart type to Line.
object[] Parameters = new Object[1];
Parameters[0] = 4; //xlLine = 4
oChart.GetType().InvokeMember("ChartType", BindingFlags.SetProperty,
null, oChart, Parameters);

//Update the chart image and quit MSGraph.
oChartApp.GetType().InvokeMember("Update",
BindingFlags.InvokeMethod, null, oChartApp, null);
oChartApp.GetType().InvokeMember("Quit",
BindingFlags.InvokeMethod, null, oChartApp, null);
//... If desired, you can proceed from here using the Microsoft Graph Object model on the oChart and oChartApp objects to make additional changes to the chart.

//Set the width of the chart.

oShape.Width = oWord.InchesToPoints(6.25f);
oShape.Height = oWord.InchesToPoints(3.57f);

//Add text after the chart.

wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
wrdRng.InsertParagraphAfter();
wrdRng.InsertAfter("THE END.");

//Close this form.
this.Close();
}

private void button2_Click(object sender, EventArgs e)
{

string s = "";
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
s = openFileDialog1.FileName;
}
else
{
return;
}

//Initialize Web
Word.ApplicationClass word = new Word.ApplicationClass();
Type wordType = word.GetType();
Word.Documents docs = word.Documents;

//Open File

Type docsType = docs.GetType();
object fileName = s;
Word.Document doc = (Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, false, false });

//Convert and Save
Type docType = doc.GetType();
object saveFileName = "d:\\Reports\\aaa.doc";
          ///Other Format:
///wdFormatHTML
///wdFormatDocument
///wdFormatDOSText
///wdFormatDOSTextLineBreaks
///wdFormatEncodedText
///wdFormatRTF
///wdFormatTemplate
///wdFormatText
///wdFormatTextLineBreaks
///wdFormatUnicodeText

docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Word.WdSaveFormat.wdFormatDocument });

//Quit Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod,
null, word, null);
}

private void WordConvert(string s)
{
oWord.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();

//Open Word Document
/*Correspond to Script
var word = new ActiveXObject("Word.Application");
var doc = word.Documents.Open(docfile);
*/

oWord.Documents docs = word.Documents;
Type docsType = docs.GetType();
object objDocName = s;
oWord.Document doc = (oWord.Document)docsType.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { objDocName, true, true });

//Print and Export to Specified File
//You can use doc.PrintOut(); method. It is trouble to call parameters in sub method. So suggest to use Type.InvokeMember. You just need to set four main parameters, not call all the parameter settings of PrintOut.

Type docType = doc.GetType();
object printFileName = @"c:\aaa.ps";
docType.InvokeMember("PrintOut", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[] { false, false, oWord.WdPrintOutRange.wdPrintAllDocument, printFileName });
//new object[]{false,false,oWord.WdPrintOutRange.wdPrintAllDocument,printFileName}
//Correspond to parameters of word.PrintOut(false, false, 0, psfile);


//Quit Word
//Correspond to word.Quit();

wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);

object o1 = "c:\\aaa.ps";
object o2 = "c:\\aaa.pdf";
object o3 = "";

//Add reference and Convert PS as PDF Object
//Correspond to PDF.FileToPDF(psfile,pdffile,"") between try and catch; //Also, you can use pdfConvert.FileToPDF("c:\\test.ps","c:\\test.pdf","");

try
{
ACRODISTXLib.PdfDistillerClass pdf = new ACRODISTXLib.PdfDistillerClass();
Type pdfType = pdf.GetType();
pdfType.InvokeMember("FileToPDF", System.Reflection.BindingFlags.InvokeMethod, null, pdf, new object[] { o1, o2, o3 });
pdf = null;
}
catch { } //Deal with Error

//In order to avoid errors when calling the method several time, you must stop acrodist.exe.
foreach (System.Diagnostics.Process proc in System.Diagnostics.Process.GetProcesses())
{
int begpos;
int endpos;

string sProcName = proc.ToString();
begpos = sProcName.IndexOf("(") + 1;
endpos = sProcName.IndexOf(")");
sProcName = sProcName.Substring(begpos, endpos - begpos);
if (sProcName.ToLower().CompareTo("acrodist") == 0)
{
try
{
proc.Kill(); //Stop Ptocess

}
catch { } //Deal with Error
break;
}
}

}

private void button3_Click(object sender, EventArgs e)
{
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
string s = openFileDialog1.FileName;
WordConvert(s);
}
}

//getnextcode
private void button4_Click(object sender, EventArgs e)
{
WorkCell myWorkCell = new WorkCell(textBox2.Text, textBox1.Text);
textBox3.Text = myWorkCell.GetNextCode();
}

}

public class WorkCell
{
private string workCellCode;
private string parentCellCode;
private string commonCode;
private char[] code;
private char[] pCode;
private char[] standCode;
private string s;

public WorkCell(string mycode, string parentcode)
{
workCellCode = mycode;
parentCellCode = parentcode;

standCode = new char[] { '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'W', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };

commonCode = Regex.Replace(parentCellCode, @"0+", "");

code = workCellCode.Substring(commonCode.Length).ToCharArray();
}
public string WorkCellCode
{
set
{
workCellCode = value;
}
get
{
return workCellCode;
}
}

public string ParentCellCode
{
set
{
workCellCode = value;
}
get
{
return workCellCode;
}
}

public string GetNextCode()
{
string s = "";
if (code.Length > 0)
{
int i = 0;
for (i = code.Length - 1; i >= 0; i--)
{
if (code[i] != '0')
{
GetNextChar(i);
break;
}
}
for (i = 0; i < code.Length; i++)
{
s += code[i].ToString();
}
return commonCode + s;
}
else
{
return "null";
}
}

//Set the next code. Find the first no zero character from right and then plus 1 according to standard. Carry when overflowing.
private char GetNextChar(int j)
{
int i = -1;
int flag = 0;
for (i = 0; i < standCode.Length; i++)
{
if (code[j] == standCode[i])
{
flag = 1;
break;
}
}

//MessageBox.Show(code[j].ToString()+" "+standCode[i].ToString()+" "+i.ToString());

if (i >= standCode.Length - 1 || flag == 0)
{
code[j] = standCode[0];
if (j > 0)
code[j - 1] = GetNextChar(j - 1);
}
else
{
code[j] = standCode[i + 1];
}

return code[j];
}
}
}
The other article about converting Office to PDF or XPS, http://www.c-sharpcorner.com/UploadFile/d2dcfc/7799/