码迷,mamicode.com
首页 > 其他好文 > 详细

Mytophome Deal

时间:2015-02-04 18:20:53      阅读:175      评论:0      收藏:0      [点我收藏+]

标签:

using AnfleCrawler.Common;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace AnfleCrawler.DataAnalyzer
{
    internal class Mytophome : AnalyzerBase
    {
        protected override void AnalyzeInternal(PageLandEntity current)
        {
            var lander = Crawler.Lander;
            var pHandler = CreateContentHandler(current);
            switch (current.Depth)
            {
                case 0:
                    {
                        var dom = lander.GetDocument(pHandler);
                        var nextNode = QueryNode(dom.DocumentNode, "nobr").ParentNode;
                        nextNode.SetAttributeValue("id", PagingHack);
                        DoPerPaging(current, dom.DocumentNode, string.Format("#{0}", PagingHack));

                        foreach (var node in QueryNodes(dom.DocumentNode, ".deD_ctt li"))
                        {
                            var Nset = QueryNodes(node, "span").ToArray();
                            var hUrl = GetHref(QueryNode(Nset[1], "a"), current.Url);
                            var query = System.Web.HttpUtility.ParseQueryString(hUrl.Query);
                            string shid = query["estateId"];
                            hUrl = new Uri(string.Format("http://{0}/wiki/{1}/detail.html", hUrl.Authority, shid));
                            Guid housesID;
                            try
                            {
                                CheckHouses(hUrl, out housesID);
                            }
                            catch (HtmlNodeMissingException ex)
                            {
                                App.LogError(ex, "OrgUrl={0} HousesUrl={1}", shid, hUrl);
                                continue;
                            }

                            var vals = Nset.Select(p => p.InnerText.HtmlTrim()).ToArray();
                            DateTime? transactionDate = null;
                            DateTime dump;
                            if (DateTime.TryParse(vals.Last(), out dump))
                            {
                                transactionDate = dump;
                            }
                            if (vals.Length == 6)
                            {
                                Repository.SaveHouselisting(new HouselistingEntity()
                                {
                                    HousesID = housesID,
                                    TransactionDate = transactionDate,
                                    BuildingName = vals[2],
                                    Area = string.Format("{0}平方", vals[3]),
                                    SoldPriceOrRent = string.Format("{0}万", vals[4]),
                                    UnitPriceOrLease = string.Format("{0}元/平方", vals[5]),
                                });
                            }
                            else
                            {
                                Repository.SaveHouselisting(new HouselistingEntity()
                                {
                                    HousesID = housesID,
                                    TransactionDate = transactionDate,
                                    Area = string.Format("{0}平方", vals[2]),
                                    SoldPriceOrRent = string.Format("{0}万", vals[3]),
                                    UnitPriceOrLease = string.Format("{0}元/平方", vals[4]),
                                });
                            }
                            Crawler.OutWrite("保存小区出售记录 {0}", housesID);
                        }
                    }
                    break;
            }
        }

        private void CheckHouses(Uri housesUrl, out Guid housesID)
        {
            var pHandler = CreateContentHandler(new PageLandEntity()
            {
                Url = housesUrl,
                Depth = DataDepth.Houses
            });
            pHandler.AjaxBlocks.Add(HACK);
            var dom = Crawler.Lander.GetDocument(pHandler);
            var attrs = new AttributeFiller();

            attrs.Append(QueryTexts(dom.DocumentNode, ".xxjs_rbar_ct li"));

            housesID = GenHashKey(housesUrl.OriginalString);
            var bo = Crawler.Repository.LoadHouses(housesID);
            if (!string.IsNullOrEmpty(bo.SiteID))
            {
                return;
            }
            bo.SiteID = "Mytophome.com";
            bo.PageUrl = housesUrl.OriginalString;
            bo.CityName = Crawler.Config.CityName;
            attrs.FillEntity(bo, new Dictionary<string, string>()
            {
                {"楼盘名称", "小区名称"},
                {"楼盘地址", "小区地址"},
                {"发展商", "开发商"},
                {"物管公司", "物业公司"},
                {"物管电话", "物业办公电话"},
            });
            MapMark(bo);
            Crawler.Repository.Save(bo);
            Crawler.OutWrite("保存楼盘 {0}", bo.小区名称);
        }
    }
}

 

Mytophome Deal

标签:

原文地址:http://www.cnblogs.com/Googler/p/4272703.html

(0)
(0)
   
举报
评论 一句话评论(0
登录后才能评论!
© 2014 mamicode.com 版权所有  联系我们:gaon5@hotmail.com
迷上了代码!