using System;
using System.Collections.Generic;
using System.Net;
using System.Web;
class HLRT_Wrapper
{
WebClient webClient;
public HLRT_Wrapper()
{
webClient = new WebClient();
}
public List<List<String>> parseWebsite(string address, string head, string tail, params Element[] elemente)
{
List<List<String>> result = new List<List<String>>();
string source = HttpUtility.HtmlDecode(webClient.DownloadString(address));
int leftIndex = source.IndexOf(head);
int rightIndex = source.IndexOf(head);
while (true)
{
for (int i = 0; i < elemente.Length; i++)
{
leftIndex = source.IndexOf(elemente[i].left, rightIndex + 1);
rightIndex = source.IndexOf(elemente[i].right, leftIndex + 1);
if (rightIndex > source.IndexOf(tail) || leftIndex == -1 || rightIndex == -1)
return result;
string text = source.Substring(leftIndex + elemente[i].leftDistance, rightIndex - leftIndex - elemente[i].leftDistance - elemente[i].rightDistance);
if (result.Count < i + 1)
result.Add(new List<String>());
result[i].Add(text);
}
}
}
public class Element
{
public string left { get; set; }
public int leftDistance { get; set; }
public string right { get; set; }
public int rightDistance { get; set; }
public Element(string left, int leftDistance, string right, int rightDistance)
{
this.left = left;
this.leftDistance = leftDistance;
this.right = right;
this.rightDistance = rightDistance;
}
}
}
public void parseDotnetSnippets()
{
HLRT_Wrapper wrapper = new HLRT_Wrapper();
HLRT_Wrapper.Element sprache = new HLRT_Wrapper.Element("col1\">", 6, "<", 0);
HLRT_Wrapper.Element autor = new HLRT_Wrapper.Element("col2\">", 6, "<", 0);
HLRT_Wrapper.Element url = new HLRT_Wrapper.Element("<a href=", 9, "\">", 0);
HLRT_Wrapper.Element titel = new HLRT_Wrapper.Element(">", 1, "</a>", 0);
List<List<String>> liste = wrapper.parseWebsite("http://dotnet-snippets.de/", "col3\">Titel", "</table>", sprache, autor, url, titel);
for (int j = 0; j < liste[0].Count; j++)
{
for (int i = 0; i < liste.Count; i++)
Console.Write(liste[i][j] + "\t");
Console.WriteLine();
}
}
public void parseWDR2()
{
HLRT_Wrapper wrapper = new HLRT_Wrapper();
HLRT_Wrapper.Element interpret = new HLRT_Wrapper.Element("strong", 7, "<", 1);
HLRT_Wrapper.Element titel = new HLRT_Wrapper.Element("/strong", 8, "<", 0);
List<List<String>> liste = wrapper.parseWebsite("http://www.wdr2.de/", "colored", "class=\"live", interpret, titel);
for (int j = 0; j < liste[0].Count; j++)
{
for (int i = 0; i < liste.Count; i++)
Console.Write(liste[i][j] + "\t");
Console.WriteLine();
}
}