Reader Level:
ARTICLE

Convert HTML to Word Then Word to PDF With C#

Posted by Lizzy Landy Articles | C# Language July 14, 2011
This method does not convert HTML to PDF directly, but instead first converts HTML to Word then converts Word to PDF.
  • 0
  • 0
  • 22795

After getting the method about converting Office to PDF, I recently researched methods for converting a HTML file to PDF and collected many materials. Now I have one method that I want to share with you.

This method does not convert HTML to PDF directly, but instead first converts HTML to Word then converts Word to PDF.

Using the Code:
using System;
using System.Collections.Generic;
using System.ComponentModel;
using System.Data;
using System.Drawing;
using System.Text;
using System.Windows.Forms;
using Word = Microsoft.Office.Interop.Word;
using oWord = Microsoft.Office.Interop.Word;
using System.Reflection;

using System.Configuration;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using Microsoft.Office.Core;
using System.Text.RegularExpressions;

namespace WindowsApplication2
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
private void button1_Click(object sender, EventArgs e)
{
object oMissing = System.Reflection.Missing.Value;
object oEndOfDoc = "\\endofdoc"; /* \endofdoc is a predefined bookmark */

//Start Word and create a new document.

Word._Application oWord;
Word._Document oDoc;
oWord = new Word.Application();
oWord.Visible = true;
oDoc = oWord.Documents.Add(ref oMissing, ref oMissing,
ref oMissing, ref oMissing);

//Insert a paragraph at the beginning of the document.
Word.Paragraph oPara1;
oPara1 = oDoc.Content.Paragraphs.Add(ref oMissing);
oPara1.Range.Text = "Heading 1";
oPara1.Range.Font.Bold = 1;
oPara1.Format.SpaceAfter = 24; //24 pt spacing after paragraph.
oPara1.Range.InsertParagraphAfter();
//Insert a paragraph at the end of the document.
Word.Paragraph oPara2;
object oRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oPara2 = oDoc.Content.Paragraphs.Add(ref oRng);
oPara2.Range.Text = "Heading 2";
oPara2.Format.SpaceAfter = 6;
oPara2.Range.InsertParagraphAfter();

//Insert another paragraph.
Word.Paragraph oPara3;
oRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oPara3 = oDoc.Content.Paragraphs.Add(ref oRng);
oPara3.Range.Text = "This is a sentence of normal text. Now here is a table:";
oPara3.Range.Font.Bold = 0;
oPara3.Format.SpaceAfter = 24;
oPara3.Range.InsertParagraphAfter();

//Insert a 3 x 5 table, fill it with data, and make the first row bold and italic.
Word.Table oTable;
Word.Range wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oTable = oDoc.Tables.Add(wrdRng, 3, 5, ref oMissing, ref oMissing);
oTable.Range.ParagraphFormat.SpaceAfter = 6;
int r, c;
string strText;
for (r = 1; r <= 3; r++)
for (c = 1; c <= 5; c++)
{
strText = "r" + r + "c" + c;
oTable.Cell(r, c).Range.Text = strText;
}
oTable.Rows[1].Range.Font.Bold = 1;
oTable.Rows[1].Range.Font.Italic = 1;

//Add some text after the table.
Word.Paragraph oPara4;
oRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oPara4 = oDoc.Content.Paragraphs.Add(ref oRng);
oPara4.Range.InsertParagraphBefore();
oPara4.Range.Text = "And here's another table:";
oPara4.Format.SpaceAfter = 24;
oPara4.Range.InsertParagraphAfter();

//Insert a 5 x 2 table, fill it with data, and change the column widths.
wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oTable = oDoc.Tables.Add(wrdRng, 5, 2, ref oMissing, ref oMissing);
oTable.Range.ParagraphFormat.SpaceAfter = 6;
for (r = 1; r <= 5; r++)
for (c = 1; c <= 2; c++)
{
strText = "r" + r + "c" + c;
oTable.Cell(r, c).Range.Text = strText;
}
oTable.Columns[1].Width = oWord.InchesToPoints(2); //Change width of columns 1 & 2
oTable.Columns[2].Width = oWord.InchesToPoints(3);

//Keep inserting text. When you get to 7 inches from top of the document, insert a hard page break.
object oPos;
double dPos = oWord.InchesToPoints(7);
oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range.InsertParagraphAfter();
do
{
wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
wrdRng.ParagraphFormat.SpaceAfter = 6;
wrdRng.InsertAfter("A line of text");
wrdRng.InsertParagraphAfter();
oPos = wrdRng.get_Information
(Word.WdInformation.wdVerticalPositionRelativeToPage);
}
while (dPos >= Convert.ToDouble(oPos));
object oCollapseEnd = Word.WdCollapseDirection.wdCollapseEnd;
object oPageBreak = Word.WdBreakType.wdPageBreak;
wrdRng.Collapse(ref oCollapseEnd);
wrdRng.InsertBreak(ref oPageBreak);
wrdRng.Collapse(ref oCollapseEnd);
wrdRng.InsertAfter("We're now on page 2. Here's my chart:");
wrdRng.InsertParagraphAfter();

//Insert a chart.
Word.InlineShape oShape;
object oClassType = "MSGraph.Chart.8";
wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
oShape = wrdRng.InlineShapes.AddOLEObject(ref oClassType, ref oMissing,
ref oMissing, ref oMissing, ref oMissing,
ref oMissing, ref oMissing, ref oMissing);

//Demonstrate use of late bound oChart and oChartApp objects to manipulate the chart object with MSGraph.
object oChart;
object oChartApp;
oChart = oShape.OLEFormat.Object;
oChartApp = oChart.GetType().InvokeMember("Application",
BindingFlags.GetProperty, null, oChart, null);

//Change the chart type to Line.
object[] Parameters = new Object[1];
Parameters[0] = 4; //xlLine = 4
oChart.GetType().InvokeMember("ChartType", BindingFlags.SetProperty,
null, oChart, Parameters);

//Update the chart image and quit MSGraph.
oChartApp.GetType().InvokeMember("Update",
BindingFlags.InvokeMethod, null, oChartApp, null);
oChartApp.GetType().InvokeMember("Quit",
BindingFlags.InvokeMethod, null, oChartApp, null);
//... If desired, you can proceed from here using the Microsoft Graph Object model on the oChart and oChartApp objects to make additional changes to the chart.

//Set the width of the chart.

oShape.Width = oWord.InchesToPoints(6.25f);
oShape.Height = oWord.InchesToPoints(3.57f);

//Add text after the chart.

wrdRng = oDoc.Bookmarks.get_Item(ref oEndOfDoc).Range;
wrdRng.InsertParagraphAfter();
wrdRng.InsertAfter("THE END.");

//Close this form.
this.Close();
}

private void button2_Click(object sender, EventArgs e)
{

string s = "";
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
s = openFileDialog1.FileName;
}
else
{
return;
}

//Initialize Web
Word.ApplicationClass word = new Word.ApplicationClass();
Type wordType = word.GetType();
Word.Documents docs = word.Documents;

//Open File

Type docsType = docs.GetType();
object fileName = s;
Word.Document doc = (Word.Document)docsType.InvokeMember("Open",
System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { fileName, false, false });

//Convert and Save
Type docType = doc.GetType();
object saveFileName = "d:\\Reports\\aaa.doc";
          ///Other Format:
///wdFormatHTML
///wdFormatDocument
///wdFormatDOSText
///wdFormatDOSTextLineBreaks
///wdFormatEncodedText
///wdFormatRTF
///wdFormatTemplate
///wdFormatText
///wdFormatTextLineBreaks
///wdFormatUnicodeText

docType.InvokeMember("SaveAs", System.Reflection.BindingFlags.InvokeMethod,
null, doc, new object[] { saveFileName, Word.WdSaveFormat.wdFormatDocument });

//Quit Word
wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod,
null, word, null);
}

private void WordConvert(string s)
{
oWord.ApplicationClass word = new Microsoft.Office.Interop.Word.ApplicationClass();
Type wordType = word.GetType();

//Open Word Document
/*Correspond to Script
var word = new ActiveXObject("Word.Application");
var doc = word.Documents.Open(docfile);
*/

oWord.Documents docs = word.Documents;
Type docsType = docs.GetType();
object objDocName = s;
oWord.Document doc = (oWord.Document)docsType.InvokeMember("Open", System.Reflection.BindingFlags.InvokeMethod, null, docs, new Object[] { objDocName, true, true });

//Print and Export to Specified File
//You can use doc.PrintOut(); method. It is trouble to call parameters in sub method. So suggest to use Type.InvokeMember. You just need to set four main parameters, not call all the parameter settings of PrintOut.

Type docType = doc.GetType();
object printFileName = @"c:\aaa.ps";
docType.InvokeMember("PrintOut", System.Reflection.BindingFlags.InvokeMethod, null, doc, new object[] { false, false, oWord.WdPrintOutRange.wdPrintAllDocument, printFileName });
//new object[]{false,false,oWord.WdPrintOutRange.wdPrintAllDocument,printFileName}
//Correspond to parameters of word.PrintOut(false, false, 0, psfile);


//Quit Word
//Correspond to word.Quit();

wordType.InvokeMember("Quit", System.Reflection.BindingFlags.InvokeMethod, null, word, null);

object o1 = "c:\\aaa.ps";
object o2 = "c:\\aaa.pdf";
object o3 = "";

//Add reference and Convert PS as PDF Object
//Correspond to PDF.FileToPDF(psfile,pdffile,"") between try and catch; //Also, you can use pdfConvert.FileToPDF("c:\\test.ps","c:\\test.pdf","");

try
{
ACRODISTXLib.PdfDistillerClass pdf = new ACRODISTXLib.PdfDistillerClass();
Type pdfType = pdf.GetType();
pdfType.InvokeMember("FileToPDF", System.Reflection.BindingFlags.InvokeMethod, null, pdf, new object[] { o1, o2, o3 });
pdf = null;
}
catch { } //Deal with Error

//In order to avoid errors when calling the method several time, you must stop acrodist.exe.
foreach (System.Diagnostics.Process proc in System.Diagnostics.Process.GetProcesses())
{
int begpos;
int endpos;

string sProcName = proc.ToString();
begpos = sProcName.IndexOf("(") + 1;
endpos = sProcName.IndexOf(")");
sProcName = sProcName.Substring(begpos, endpos - begpos);
if (sProcName.ToLower().CompareTo("acrodist") == 0)
{
try
{
proc.Kill(); //Stop Ptocess

}
catch { } //Deal with Error
break;
}
}

}

private void button3_Click(object sender, EventArgs e)
{
if (openFileDialog1.ShowDialog() == DialogResult.OK)
{
string s = openFileDialog1.FileName;
WordConvert(s);
}
}

//getnextcode
private void button4_Click(object sender, EventArgs e)
{
WorkCell myWorkCell = new WorkCell(textBox2.Text, textBox1.Text);
textBox3.Text = myWorkCell.GetNextCode();
}

}

public class WorkCell
{
private string workCellCode;
private string parentCellCode;
private string commonCode;
private char[] code;
private char[] pCode;
private char[] standCode;
private string s;

public WorkCell(string mycode, string parentcode)
{
workCellCode = mycode;
parentCellCode = parentcode;

standCode = new char[] { '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'W', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z' };

commonCode = Regex.Replace(parentCellCode, @"0+", "");

code = workCellCode.Substring(commonCode.Length).ToCharArray();
}
public string WorkCellCode
{
set
{
workCellCode = value;
}
get
{
return workCellCode;
}
}

public string ParentCellCode
{
set
{
workCellCode = value;
}
get
{
return workCellCode;
}
}

public string GetNextCode()
{
string s = "";
if (code.Length > 0)
{
int i = 0;
for (i = code.Length - 1; i >= 0; i--)
{
if (code[i] != '0')
{
GetNextChar(i);
break;
}
}
for (i = 0; i < code.Length; i++)
{
s += code[i].ToString();
}
return commonCode + s;
}
else
{
return "null";
}
}

//Set the next code. Find the first no zero character from right and then plus 1 according to standard. Carry when overflowing.
private char GetNextChar(int j)
{
int i = -1;
int flag = 0;
for (i = 0; i < standCode.Length; i++)
{
if (code[j] == standCode[i])
{
flag = 1;
break;
}
}

//MessageBox.Show(code[j].ToString()+" "+standCode[i].ToString()+" "+i.ToString());

if (i >= standCode.Length - 1 || flag == 0)
{
code[j] = standCode[0];
if (j > 0)
code[j - 1] = GetNextChar(j - 1);
}
else
{
code[j] = standCode[i + 1];
}

return code[j];
}
}
}
The other article about converting Office to PDF or XPS, http://www.c-sharpcorner.com/UploadFile/d2dcfc/7799/ 

Article Extensions
Contents added by tayfun iskender on Jun 29, 2013
thank you for 
Contents added by Swati Kumari on Mar 19, 2013
Contents added by Swati Kumari on Mar 19, 2013
fgh
Contents added by Swati Kumari on Mar 19, 2013
Contents added by m t on Mar 03, 2013
Contents added by m t on Mar 03, 2013
Contents added by m t on Mar 03, 2013
COMMENT USING

Trending up