forked from sin365/AxibugEmuOnline
143 lines
5.5 KiB
C#
143 lines
5.5 KiB
C#
|
using HtmlAgilityPack;
|
|||
|
using System.Text;
|
|||
|
using System.Xml;
|
|||
|
|
|||
|
namespace HtmlTable
|
|||
|
{
|
|||
|
internal class Program
|
|||
|
{
|
|||
|
static string loc = Path.GetDirectoryName(AppContext.BaseDirectory) + "\\";
|
|||
|
const string InDir = "Input";
|
|||
|
const string OutDir = "Out";
|
|||
|
static void Main(string[] args)
|
|||
|
{
|
|||
|
if (!Directory.Exists(loc + InDir))
|
|||
|
{
|
|||
|
Console.WriteLine("Input文件不存在");
|
|||
|
Console.ReadLine();
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
if (!Directory.Exists(loc + OutDir))
|
|||
|
{
|
|||
|
Console.WriteLine("Out文件不存在");
|
|||
|
Console.ReadLine();
|
|||
|
return;
|
|||
|
}
|
|||
|
|
|||
|
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
|
|||
|
string[] files = FileHelper.GetDirFile(loc + InDir);
|
|||
|
Console.WriteLine($"共{files.Length}个文件,是否处理? (y/n)");
|
|||
|
|
|||
|
string yn = Console.ReadLine();
|
|||
|
if (yn.ToLower() != "y")
|
|||
|
return;
|
|||
|
|
|||
|
int index = 0;
|
|||
|
int errcount = 0;
|
|||
|
List<string> outline = new List<string>();
|
|||
|
for (int i = 0; i < files.Length; i++)
|
|||
|
{
|
|||
|
string FileName = files[i].Substring(files[i].LastIndexOf("\\"));
|
|||
|
|
|||
|
if (!FileName.ToLower().Contains(".htm"))
|
|||
|
{
|
|||
|
continue;
|
|||
|
}
|
|||
|
index++;
|
|||
|
|
|||
|
Console.WriteLine($">>>>>>>>>>>>>>开始处理 第{index}个文件 {FileName}<<<<<<<<<<<<<<<<<<<");
|
|||
|
outline.AddRange(GetToData(File.ReadAllText(files[i],System.Text.Encoding.GetEncoding("gb2312"))));
|
|||
|
Console.WriteLine($">>>>>>>>>>>>>>成功处理 第{index}个");
|
|||
|
}
|
|||
|
|
|||
|
string newfileName = "out.csv";
|
|||
|
string outstring = loc + OutDir + "\\" + newfileName;
|
|||
|
FileHelper.SaveFile(outstring, outline.ToArray());
|
|||
|
|
|||
|
|
|||
|
Console.WriteLine($"已处理{files.Length}个文件,其中{errcount}个失败");
|
|||
|
|
|||
|
Console.ReadLine();
|
|||
|
}
|
|||
|
|
|||
|
static List<string> GetToData(string html)
|
|||
|
{
|
|||
|
List<string> result = new List<string>();
|
|||
|
HtmlDocument doc = new HtmlDocument();
|
|||
|
doc.LoadHtml(html);
|
|||
|
|
|||
|
// 假设table的XPath已经给出,但这里我们直接使用根table(因为示例中只有一个)
|
|||
|
HtmlNode table = doc.DocumentNode.SelectSingleNode("//table[@width='100%' and @border='1']");
|
|||
|
|
|||
|
if (table != null)
|
|||
|
{
|
|||
|
var all = table.SelectNodes("tr");
|
|||
|
|
|||
|
int Idx = 0;
|
|||
|
// 遍历除了标题行之外的所有行
|
|||
|
foreach (HtmlNode row in all) // 跳过标题行
|
|||
|
{
|
|||
|
Idx++;
|
|||
|
if (Idx == 1)
|
|||
|
continue;
|
|||
|
// 提取游戏名称和游戏链接
|
|||
|
HtmlNode gameNameNode = row.SelectSingleNode("td");
|
|||
|
|
|||
|
|
|||
|
|
|||
|
if (gameNameNode != null)
|
|||
|
{
|
|||
|
|
|||
|
try
|
|||
|
{
|
|||
|
HtmlNode gameNode1 = row.SelectSingleNode("td[1]/div");
|
|||
|
if(gameNode1 == null) gameNode1 = row.SelectSingleNode("td[1]");
|
|||
|
string gameName = gameNode1.InnerText.Trim();
|
|||
|
string gameUrl = gameNode1.SelectSingleNode("a").GetAttributeValue("href", null);
|
|||
|
|
|||
|
HtmlNode gameNode2 = row.SelectSingleNode("td[2]/div");
|
|||
|
if (gameNode2 == null) gameNode2 = row.SelectSingleNode("td[2]");
|
|||
|
string imgUrl = gameNode2.SelectSingleNode("a").GetAttributeValue("href", null);
|
|||
|
|
|||
|
HtmlNode gameNode3 = row.SelectSingleNode("td[3]/div");
|
|||
|
if (gameNode3 == null) gameNode3 = row.SelectSingleNode("td[3]");
|
|||
|
string gameType = gameNode3.InnerText.Trim();
|
|||
|
|
|||
|
HtmlNode gameNode4 = row.SelectSingleNode("td[4]/div");
|
|||
|
if (gameNode4 == null) gameNode4 = row.SelectSingleNode("td[4]");
|
|||
|
string description = gameNode4.InnerText.Trim();
|
|||
|
|
|||
|
//// 假设图片URL、游戏类型和说明分别在第二个、第三个和第四个<td>中
|
|||
|
//HtmlNode imgNode = row.SelectSingleNode("td:nth-child(2) img");
|
|||
|
//string imgUrl = imgNode?.GetAttributeValue("src", null);
|
|||
|
|
|||
|
//HtmlNode gameTypeNode = row.SelectSingleNode("td:nth-child(3)");
|
|||
|
//string gameType = gameTypeNode?.InnerText.Trim();
|
|||
|
|
|||
|
//HtmlNode descriptionNode = row.SelectSingleNode("td:nth-child(4)");
|
|||
|
//string description = descriptionNode?.InnerText.Trim();
|
|||
|
|
|||
|
string outline = $"\"{gameName}\",\"{gameUrl}\",\"{imgUrl}\",\"{gameType}\",\"{description}\"";
|
|||
|
// 输出信息
|
|||
|
Console.WriteLine(outline);
|
|||
|
|
|||
|
result.Add(outline);
|
|||
|
}
|
|||
|
catch
|
|||
|
{
|
|||
|
|
|||
|
}
|
|||
|
|
|||
|
}
|
|||
|
}
|
|||
|
}
|
|||
|
else
|
|||
|
{
|
|||
|
Console.WriteLine("未找到指定的table元素");
|
|||
|
}
|
|||
|
return result;
|
|||
|
}
|
|||
|
}
|
|||
|
}
|