新闻| 文章| 资讯| 行情| 企业| wap手机版| article文章| 首页|会员中心|保存桌面|手机浏览
普通会员

淄博市又菱科技有限公司

企业列表
新闻列表
  • 暂无新闻
推荐企业新闻
联系方式
  • 联系人:汪女士
首页 > 新闻中心 > 批量抓取title keywords descrip【seo工具】
新闻中心
批量抓取title keywords descrip【seo工具】
发布时间:2024-11-10        浏览次数:1        返回列表
using System;using System.Collections.Generic;using System.ComponentModel;using System.Data;using System.Drawing;using System.Text;using System.Windows.Forms;using System.IO;using System.Net;using System.Text.Regularexpressions;using System.Collections;//作者:CppCoding 最后编辑 2011 5 6 特征码:yschttl spt 这个是在雅虎中用的 换搜索引擎的话 针对不同的搜索引擎会有不同 测试了 nike puma 均是该特征码//目前可能运行缓慢//报错原因:网络速度,网页响应时间 由于是国外网站 读取源代码可能失败 所以报错//Ver0.0.2//实现了导出、自动翻页、设置搜索页面数量等功能。namespace 获取html_控制截取{ public partial class Form1 : Form { public Form1() { InitializeComponent(); } private static string GetStringByUrl(string strUrl) { WebRequest wrt = System.Net.WebRequest.Create(strUrl); WebResponse wrse = wrt.GetResponse(); Stream strM = wrse.GetResponseStream(); StreamReader SR = new StreamReader(strM, Encoding.GetEncoding("utf-8")); string strallstrm = SR.ReadToEnd(); return strallstrm; } private void button1_Click(object sender, EventArgs e) { string url = txt_Url.Text; int j=0; int want = Convert.ToInt32(txtWant.Text); while( j<want&&url!=null) { string herf = GetStringByUrl(url); //string[] splitHerf = Regex.Split(herf, "yschttl spt", RegexOptions.IgnoreCase); string[] splitHerf = herf.Split(new string[] { ""yschttl spt""},StringSplitOptions.None); for (int i = 1; i < splitHerf.Length; i++) { //string[] splitWant = Regex.Split(splitHerf[i], "data-", RegexOptions.IgnoreCase); string[] splitWount = splitHerf[i].Split(new string[] { """ }, StringSplitOptions.None); //for (int t = 0; t < 1; t++) //{ // //Regex reg = new Regex(@"(?is)<a(?:(?!href=).)*href=(['""]?)(?<url>[^""s>]*)1[^>]*>(?<text>(?:(?!</?ab).)*)</a>"); // //MatchCollection mc = reg.Matches(splitWount[1]); // //foreach (Match m in mc) // //{ // // txtEnd.Text += m.Groups["url"].Value + "

批量抓取title keywords descrip【seo工具】

";
// //} // txtEnd.Text += sR; //} string title=""; string key=""; string des=""; //string[] st= splitWount[0].Split(new string[]{"href=""},StringSplitOptions.None); //string[] go = st[1].Split('"'); string nextUrl = splitWount[1]; string code = GetStringByUrl(nextUrl); string checkTitle = "<title>"; string checkKey = ""Keywords" content=""; string checkDes = ""Description" content=""; if (code.IndexOf(checkTitle) > -1) { string[] fistTitle = code.Split(new string[] { "<title>" }, StringSplitOptions.None); string[] secTitle = fistTitle[1].Split(new string[] { "<" }, StringSplitOptions.None); title = secTitle[0]; } else continue; if (code.IndexOf(checkKey) > -1) { string[] fistKey = code.Split(new string[] { "ds" content="" }, StringSplitOptions.None); string[] secKey = fistKey[1].Split(new string[] { """ }, StringSplitOptions.None); key = secKey[0]; } else continue; if (code.IndexOf(checkDes) > -1) { string[] fistDes = code.Split(new string[] { ""Description" content="" }, StringSplitOptions.RemoveEmptyEntries); string[] secDes = fistDes[1].Split(new string[] { """ }, StringSplitOptions.RemoveEmptyEntries); des = secDes[0]; } else continue;// txtEnd.Text += j+"..目前扫描到的网页为: "+url + " "; txtEnd.Text += des + "|" + key + "|" + des + " "; //sw.WriteLine(txtEnd.Text); } j++; string[] fistNext=herf.Split(new string[]{"">Next &"},StringSplitOptions.None); string[] secNext=fistNext[0].Split(new string[]{"xt" href=""},StringSplitOptions.None); if(secNext.Length<1) { url=null; } url=secNext[1]; } } private void button2_Click(object sender, EventArgs e) { StreamWriter sw = new StreamWriter(@"C:documents and SettingsAdministrator桌面获取字符串.doc", true, Encoding.UTF8); sw.WriteLine(txtEnd.Text); sw.Close(); } }