C# Corner
Tech
News
Videos
Forums
Trainings
Books
Events
More
Interviews
Jobs
Live
Learn
Career
Members
Blogs
Challenges
Certifications
Bounties
Contribute
Article
Blog
Video
Ebook
Interview Question
Collapse
Feed
Dashboard
Wallet
Learn
Achievements
Network
Refer
Rewards
SharpGPT
Premium
Contribute
Article
Blog
Video
Ebook
Interview Question
Register
Login
Convert HTML Tables To DataSet In C#
WhatsApp
Ashish Kumar Jaiswal
Aug 23
2016
12.1
k
0
2
private
DataSet ConvertHTMLTablesToDataSet(
string
HTML) {
// Declarations
DataSet ds =
new
DataSet();
DataTable dt =
null
;
DataRow dr =
null
;
DataColumn dc =
null
;
string
TableExpression =
"<TABLE[^>]*>(.*?)</TABLE>"
;
string
HeaderExpression =
"<TH[^>]*>(.*?)</TH>"
;
string
RowExpression =
"<TR[^>]*>(.*?)</TR>"
;
string
ColumnExpression =
"<TD[^>]*>(.*?)</TD>"
;
bool
HeadersExist =
false
;
int
iCurrentColumn = 0;
int
iCurrentRow = 0;
// Get a match for all the tables in the HTML
MatchCollection Tables = Regex.Matches(HTML, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Loop through each table element
foreach
(Match Table
in
Tables) {
// Reset the current row counter and the header flag
iCurrentRow = 0;
HeadersExist =
false
;
// Add a new table to the DataSet
dt =
new
DataTable();
//Create the relevant amount of columns for this table (use the headers if they exist, otherwise use default names)
// if (Table.Value.Contains("<th"))
if
(Table.Value.Contains(
"<TH"
)) {
// Set the HeadersExist flag
HeadersExist =
true
;
// Get a match for all the rows in the table
MatchCollection Headers = Regex.Matches(Table.Value, HeaderExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Loop through each header element
foreach
(Match Header
in
Headers) {
dt.Columns.Add(Header.Groups[1].ToString());
}
}
else
{
for
(
int
iColumns = 1; iColumns <= Regex.Matches(Regex.Matches(Regex.Matches(Table.Value, TableExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase)[0].ToString(), ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase).Count; iColumns++) {
dt.Columns.Add(
"Column "
+ iColumns);
}
}
//Get a match for all the rows in the table
MatchCollection Rows = Regex.Matches(Table.Value, RowExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Loop through each row element
foreach
(Match Row
in
Rows) {
// Only loop through the row if it isn't a header row
if
(!(iCurrentRow == 0 && HeadersExist)) {
// Create a new row and reset the current column counter
dr = dt.NewRow();
iCurrentColumn = 0;
// Get a match for all the columns in the row
MatchCollection Columns = Regex.Matches(Row.Value, ColumnExpression, RegexOptions.Multiline | RegexOptions.Singleline | RegexOptions.IgnoreCase);
// Loop through each column element
foreach
(Match Column
in
Columns) {
// Add the value to the DataRow
dr[iCurrentColumn] = Column.Groups[1].ToString();
// Increase the current column
iCurrentColumn++;
}
// Add the DataRow to the DataTable
dt.Rows.Add(dr);
}
// Increase the current row counter
iCurrentRow++;
}
// Add the DataTable to the DataSet
ds.Tables.Add(dt);
}
return
ds;
}
C#
Convert HTML Tables
DataSet In C#
Up Next
Convert HTML Tables To DataSet In C#