();
HtmlDocument doc = new HtmlDocument();
doc.LoadHtml(html);
// 假设table的XPath已经给出,但这里我们直接使用根table(因为示例中只有一个)
HtmlNode table = doc.DocumentNode.SelectSingleNode("//table[@width='100%' and @border='1']");
if (table != null)
{
var all = table.SelectNodes("tr");
int Idx = 0;
// 遍历除了标题行之外的所有行
foreach (HtmlNode row in all) // 跳过标题行
{
Idx++;
if (Idx == 1)
continue;
// 提取游戏名称和游戏链接
HtmlNode gameNameNode = row.SelectSingleNode("td");
if (gameNameNode != null)
{
try
{
HtmlNode gameNode1 = row.SelectSingleNode("td[1]/div");
if(gameNode1 == null) gameNode1 = row.SelectSingleNode("td[1]");
string gameName = gameNode1.InnerText.Trim();
string gameUrl = gameNode1.SelectSingleNode("a").GetAttributeValue("href", null);
HtmlNode gameNode2 = row.SelectSingleNode("td[2]/div");
if (gameNode2 == null) gameNode2 = row.SelectSingleNode("td[2]");
string imgUrl = gameNode2.SelectSingleNode("a").GetAttributeValue("href", null);
HtmlNode gameNode3 = row.SelectSingleNode("td[3]/div");
if (gameNode3 == null) gameNode3 = row.SelectSingleNode("td[3]");
string gameType = gameNode3.InnerText.Trim();
HtmlNode gameNode4 = row.SelectSingleNode("td[4]/div");
if (gameNode4 == null) gameNode4 = row.SelectSingleNode("td[4]");
string description = gameNode4.InnerText.Trim();
//// 假设图片URL、游戏类型和说明分别在第二个、第三个和第四个中
//HtmlNode imgNode = row.SelectSingleNode("td:nth-child(2) img");
//string imgUrl = imgNode?.GetAttributeValue("src", null);
//HtmlNode gameTypeNode = row.SelectSingleNode("td:nth-child(3)");
//string gameType = gameTypeNode?.InnerText.Trim();
//HtmlNode descriptionNode = row.SelectSingleNode("td:nth-child(4)");
//string description = descriptionNode?.InnerText.Trim();
string outline = $"\"{gameName}\",\"{gameUrl}\",\"{imgUrl}\",\"{gameType}\",\"{description}\"";
// 输出信息
Console.WriteLine(outline);
result.Add(outline);
}
catch
{
}
}
}
}
else
{
Console.WriteLine("未找到指定的table元素");
}
return result;
}
}
}
|