.NET2.0抓取网页全部链接

    技术2025-07-29  12

    后台代码:

    using System;using System.Data;using System.Configuration;using System.Web;using System.Web.Security;using System.Web.UI;using System.Web.UI.WebControls;using System.Web.UI.WebControls.WebParts;using System.Web.UI.HtmlControls;using System.Text.RegularExpressions;using System.Net;using System.IO;using System.Collections;

    public partial class _Default : System.Web.UI.Page {    protected void Page_Load(object sender, EventArgs e)    {        if (!IsPostBack)        {                    }            }

        protected void Button1_Click(object sender, EventArgs e)    {        TextBox2.Text = "";        string web_url = this.TextBox1.Text;//"http://blog.csdn.net/21aspnet/"        string all_code = "";        HttpWebRequest all_codeRequest = (HttpWebRequest)WebRequest.Create(web_url);        WebResponse all_codeResponse = all_codeRequest.GetResponse();        StreamReader the_Reader = new StreamReader(all_codeResponse.GetResponseStream());        all_code = the_Reader.ReadToEnd();        the_Reader.Close();        ArrayList my_list = new ArrayList();        string p = @"http://([/w-]+/.)+[/w-]+(/[/w- ./?%&=]*)?";        Regex re = new Regex(p, RegexOptions.IgnoreCase);        MatchCollection mc = re.Matches(all_code);

            for (int i = 0; i <= mc.Count - 1; i++)        {            bool _foo = false;            string name = mc[i].ToString();            foreach (string list in my_list)            {                if (name == list)                {                    _foo = true;                    break;                }            }//过滤

                if (!_foo)            {                TextBox2.Text += name + "/n";            }        }     }}

    前台<%@ Page Language="C#" AutoEventWireup="true"  CodeFile="Default.aspx.cs" Inherits="_Default" %>

    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">

    <html xmlns="http://www.w3.org/1999/xhtml" ><head runat="server">    <title>抓取网页所有链接</title>    </head><body >    <form id="form1" runat="server">    <div>        <asp:TextBox ID="TextBox1" runat="server" Width="481px"></asp:TextBox>        <asp:Button ID="Button1" runat="server" OnClick="Button1_Click" Text="提取" />        <br />        <asp:TextBox ID="TextBox2" runat="server" Height="304px" TextMode="MultiLine" Width="524px"></asp:TextBox></div>    </form></body></html>

     

    最新回复(0)