标签:encoding service htm int express charset ati pair str
1.html <%@ Page Language="C#" AutoEventWireup="true" CodeBehind="WebForm1.aspx.cs" Inherits="WebApplication19.WebForm1" %> <!DOCTYPE html> <html xmlns="http://www.w3.org/1999/xhtml"> <head runat="server"> <meta http-equiv="Content-Type" content="text/html; charset=utf-8"/> <title></title> </head> <body> <form id="form1" runat="server"> <div> <input name="_method" value="getFxRate" type="hidden"/> <input type="date" name="start" />--<input type="date" name="end" /> </div> <input type="submit" /> </form> </body> </html> 2.cs using Contract.Domain; using ETLAPP; using Framework; using HraWeb.Common; using System; using System.Collections; using System.Collections.Generic; using System.Data; using System.IO; using System.Linq; using System.Net; using System.Text; using System.Text.RegularExpressions; using System.Threading; using System.Web; using System.Web.UI; using System.Web.UI.WebControls; using System.Xml; using ThreadTemplate; namespace WebApplication19 { public enum SearchRange { th = 0, td = 1 } class ThreadParameters { public string Url { get; set; } public string pairId { get; set; } public string Status { get; set; } public int PageCount { get; set; } public int pageIndex { get; set; } } public partial class WebForm1 : BasePage { public string MKT; private System.Data.DataTable table = null; private object InitThread(object para) { List<string> trList = new List<string>(); var obj = (ThreadParameters)para; obj.pageIndex = 1; if (table == null) { table = new System.Data.DataTable(); table.Columns.Add(new System.Data.DataColumn() { ColumnName = "CURRENCY_PAIR",DataType=typeof(Int64) }); table.Columns.Add(new System.Data.DataColumn() { ColumnName = "ID",DataType=typeof(Int64) }); table.Columns.Add(new System.Data.DataColumn() { ColumnName = "FX_RATE" ,DataType=typeof(decimal)}); table.Columns.Add(new System.Data.DataColumn() { ColumnName = "CAPTURE_DATE",DataType=typeof(DateTime) }); } int pageCount = 0; //已经运行了一页的数据 getData(para,null); pageCount = obj.PageCount; SubThread thread = new SubThread(1); //多线程的最大并发数 int maxPoolThread = 100; int totalThreadNum = pageCount - 1; //当前正在运行的线程 var runingHt = new Dictionary<int, SubThread>(); //处于等待队列的未运行的线程 var unRunHt = new Dictionary<int, SubThread>(); //选取maxPoolThread个线程加入运行队列,其余放入未运行的等待队列 for (int i = 2; i <= totalThreadNum; i++) { SubThread th = new SubThread(i); th.ThreadAction = a => { obj.pageIndex = i; getData(obj,th); return ""; }; if (i <= maxPoolThread) { runingHt.Add(i, th); th.Start(); } else { unRunHt.Add(i, th); } } while (true) { //初始化完成队列,用于存取已经执行完的线程的id var stepFinishList = new List<int>(); //将完成的线程放入完成队列 foreach (int tid in runingHt.Keys) { var t = runingHt[tid]; if (t.IsStopped) { stepFinishList.Add(tid); } } //1.遍历完成队列,从当前运行的线程队列中移除该线程 //2.对完成的线程执行回调,将数据持久化到数据库 //3.如果等待队列中还有数据,获取等待队列中的第一个,并执行该线程,将该线程从等待队列移除,加入到运行队列 foreach (int tid in stepFinishList) { Thread t1 = new Thread(new ParameterizedThreadStart(saveorupdate)); t1.Start(runingHt[tid].ReturnObj); runingHt.Remove(tid); if (unRunHt.Count > 0) { SubThread unRunThread = unRunHt.First().Value; var unRunTid = unRunHt.First().Key; unRunThread.Start(); runingHt.Add(unRunTid, unRunThread); unRunHt.Remove(unRunTid); } } //所有线程都完成后,跳出循环 if (runingHt.Count == 0 && unRunHt.Count == 0) { break; } } //获取一个pairId的数据后立马,保存数据库 // Holworth.Utility.HraUtility.DataTableWriteToServer(table, "BAS_FX_RATE", "ID", true); //table.Clear(); return ""; } public static Dictionary<string, BasCurrencyPair> dic = new Dictionary<string, BasCurrencyPair>(); private object getData(object para,SubThread subThread) { ThreadParameters obj = (ThreadParameters)para; obj.Url = obj.Url + "&page=" + obj.pageIndex; WebClient wc = new WebClient(); List<string> trList = new List<string>(); using (Stream stream = wc.OpenRead(obj.Url)) { using (StreamReader sr = new StreamReader(stream, Encoding.UTF8)) { string content = sr.ReadToEnd(); string pagePatern = @"var m_nRecordCount = (.*);"; var pageMatch = Regex.Match(content, pagePatern); int rows = int.Parse(pageMatch.Groups[1].Value); int pagesize = 20; int pages = (rows / pagesize) + (rows % pagesize == 0 ? 0 : 1); if (obj.pageIndex == 1) { obj.PageCount = pages; } //提取div内容开始 string divPatern = @"(?<=<div (.*)?class=""BOC_main publish""[^>]*?>)([\s\S]*?)(?=</div>)"; MatchCollection divMatches = Regex.Matches(content, divPatern); string divContent = string.Empty; foreach (Match match in divMatches) { divContent = match.Groups[0].Value; break; } //提取div内容结束 //提取表格内容开始 string tablePatern = @"(?<=<table (.*)?[^>]*?>)([\s\S]*?)(?=</table>)"; MatchCollection tableMatches = Regex.Matches(divContent, tablePatern); string tableContent = string.Empty; foreach (Match match in tableMatches) { tableContent = match.Groups[0].Value; break; } string trPatern = @"(?<=<tr(.*)?[^>]*?>)([\s\S]*?)(?=</tr>)"; MatchCollection trMatchCollection = Regex.Matches(tableContent, trPatern); for (int j = 0; j < trMatchCollection.Count; j++) { Match match = trMatchCollection[j]; string tr = string.Empty; tr = match.Groups[0].Value; trList.Add(tr); } //提取行结束 } //获取表头列元素,或者内容行的单元格元素 trlist[0]是表头 SearchR,ange告诉程序要查表头 还是 内容行 List<string> thList = GET_TH_OR_TD_LIST(SearchRange.th, trList[0]); System.Collections.ArrayList tdsList = new System.Collections.ArrayList(); ArrayList list = new ArrayList(); for (int i = 1; i < trList.Count - 1; i++) { BasFxRate row = new BasFxRate(); row.CurrencyPair = dic[obj.pairId]; var tr = GET_TH_OR_TD_LIST(SearchRange.td, trList[i]); row.FxRate = decimal.Parse(tr[6]); var date = DateTime.Now.Date; DateTime.TryParse(tr[7], out date); row.CaptureDate = date; list.Add(row); } if(subThread!=null) subThread.ReturnObj = list; //Thread th = new Thread(saveorupdate); //th.Start(list); obj.Status = "完成"; } return "子完成"; } private void saveorupdate(object obj) { var Dao = (Contract.IService.IDaoService)ctx["DaoService"]; Dao.SaveOrUpdateAll((ArrayList)obj); } System.Collections.ArrayList threadList = new System.Collections.ArrayList(); private List<string> GET_TH_OR_TD_LIST(SearchRange range, string row) { string tmp = ""; tmp = range.ToString(); string tdPatern = $@"(?<=(<{tmp}[^>]*?>))(?<tdCell>[\s\S]*?)(?=</{tmp}>)"; MatchCollection CurrenttdMatchCollection = Regex.Matches(row, tdPatern); string td = string.Empty; List<string> tdlList = new List<string>(); List<string> contentList = new List<string>(); foreach (Match match in CurrenttdMatchCollection) { td = match.Groups["tdCell"].Value; contentList.Add(td); } return contentList; } protected void Page_Load(object sender, EventArgs e) { if (Request["_method"] == "getFxRate") { info = new Framework.QueryInfo() { CustomSQL = "select * from BASE_CURRENCY_WEB" }; var Dao = (Contract.IService.IDaoService)ctx["DaoService"]; dic=Dao.FindList<BasCurrencyPair>(new QueryInfo() { QueryObject = "BasCurrencyPair" }).ToDictionary(x=>x.Id); var ds = Dao.ExcuteDataSet(info); var start = Request["start"]; var end = Request["end"]; if (string.IsNullOrEmpty(start) || string.IsNullOrEmpty(end)) { throw new Exception(":输个日期,我的哥!"); } foreach (DataRow row in ds.Tables[0].Rows) { //var code = row[""].ToString(); var pairId = row["CURRENCY_PAIR_ID"].ToString(); var url = string.Format("http://srh.bankofchina.com/search/whpj/search.jsp?erectDate={0}¬hing={1}&pjname={2}", start, end, row["Web_Code"].ToString()); var param = new ThreadParameters() { pairId = pairId, Url = url }; InitThread(param); } } } } } 3.脚本 CREATE OR REPLACE TRIGGER Bas_Fx_Rate_TRI --表名+“_”+"TRI" BEFORE INSERT ON Bas_Fx_Rate --表名 FOR EACH ROW BEGIN SELECT BAS_MODULE_SEQ.NEXTVAL INTO :NEW.ID--主键ID名称 FROM DUAL; END;
标签:encoding service htm int express charset ati pair str
原文地址:http://www.cnblogs.com/kexb/p/6045178.html