python 截取网页内容

    技术2022-05-20  49

    程序备份:

    用python截取网页的内容。以下代码times部分存在一些问题,需要增加多选结构,以匹配复杂的情况。

     

    #This file is for abtaining data from website "http://bj.58.com/diannao/"# -*- coding:utf-8 -*-# file: re_html.py#import Tkinter #界面库import urllib #url解析库import re # 正则表达式库

    #coding=utf-8from pyExcelerator import * # excel文件读写库                 class Window:        def __init__(self, root):                self.root = root                                                        # 创建组件                self.label = Tkinter.Label(root, text = '输入URL:')                self.label.place(x = 5, y = 15)                self.entryUrl = Tkinter.Entry(root,width = 30)                 self.entryUrl.place(x = 65, y = 15)                self.get = Tkinter.Button(root,                                 text = '获取数据', command = self.Get)                self.get.place(x = 280, y = 15)                self.edit = Tkinter.Text(root,width = 470,height = 600)                self.edit.place(y = 50)        def Get(self):                url = self.entryUrl.get()                                               # 获取URL                page = urllib.urlopen(url)                                              # 打开URL                data = page.read()                                                      # 读取URL内容

                    p_prices = re.compile(r"""(?<=<td width="60" class="price">)[^<]*(?=</td>)""")                p_products = re.compile("""(?<=" target="_blank" class="t">)[^<]*(?=</a>)""")                p_places = re.compile("""(?<=/diannao/' class='u'>)[^<]*(?=</a>)""")                p_times = re.compile("""(?<=<td class="pd" width="70">)[^<]*(?=</td>)""")

                    prices = p_prices.findall(data)                products = p_products.findall(data)                places = p_places.findall(data)                                              times =  p_times.findall(data)

                    self.edit.insert(Tkinter.END, len(prices))                self.edit.insert(Tkinter.END, '/n')                self.edit.insert(Tkinter.END, len(products))                self.edit.insert(Tkinter.END, '/n')                self.edit.insert(Tkinter.END, len(places))                self.edit.insert(Tkinter.END, '/n')                self.edit.insert(Tkinter.END, len(times))                self.edit.insert(Tkinter.END, '/n')                for i in range(1,len(places)):                        self.edit.insert(Tkinter.END, prices[i]+'   ')                        self.edit.insert(Tkinter.END, products[i]+'   ')                        self.edit.insert(Tkinter.END, places[i]+'   ')                        self.edit.insert(Tkinter.END, times[i]+'/n')                                                    page.close()root = Tkinter.Tk()window = Window(root)root.minsize(600,480)root.mainloop()


    最新回复(0)