try { for (int i = 1; i < 130; i++) { var html = GetHtmls("http://bbs.fobshanghai.com/viewthread.php?tid=3885995&extra=&page="+i,"","","gbk"); var ms = Regex.Matches(html, @"
看到这帖子不错 http://bbs.fobshanghai.com/viewthread.php?tid=3885995&extra=&page=1
写了一段代码 进行采集,看着方便多了
365的
try { for (int i = 1; i < 36; i++) { var html = GetHtmls("http://xxxxxxx/thread-536585-"+i+"-1.html"); var ms = Regex.Matches(html, @"[\s\S]+?id\=""msg"">([\s\S]+?)"); File.AppendAllText("365.html", string.Format(" 第{0}页
", i)); foreach (Match m in ms) { var temp = m.Groups[0].Value; if (!temp.Contains("5>yswgxx")) continue; var m1 = Regex.Match(temp, @"( [\s\S]+?)"); var str = m1.Groups[1].Value;
str = str+""+m.Groups[1].Value;
File.AppendAllText("365.html", "<p>" + str.Replace("<font color=#A7CF7A><i><b>------ 发表于安卓手机365App</b></i></font>","") + "</p>");
} //break; } MessageBox.Show("over"); } catch (Exception ex) { MessageBox.Show(ex.Message); }