CODE:
#!/usr/bin/python # -*- coding: utf-8 -*- ''' Created on 2014-8-26 @author: guaguastd @name: linkedin_network_clusters.py ''' import os import sys import json from urllib2 import HTTPError from cluster import KMeansClustering, centroid # A helper function to munge data and build up an XML tree sys.path.append(os.path.join(os.getcwd(), "e:", "eclipse", "LinkedIn", "dFile")) from mykml import createKML K = 3 # get geo code from geo import geo_from_bing g = geo_from_bing() # Load the data CONNECTIONS_DATA = 'E:\eclipse\LinkedIn\dfile\linkedin_connections.json' OUT_FILE = "E:\eclipse\LinkedIn\dfile\linkedin_clusters_kmeans.kml" # Open up your saved connections with extended profile information # or fetch them again from LinkedIn if you prefer connections = json.loads(open(CONNECTIONS_DATA).read())['values'] locations = [c['location']['name'] for c in connections if c.has_key('location')] # Some basic transforms transforms = [('Greater ', ''), (' Area', '')] # Step 1 - Tally the frequency of each location coords_freqs = {} for location in locations: if not c.has_key('location'): continue # Avoid unnecessary I/O and geo requests by building up a cache if coords_freqs.has_key(location): coords_freqs[location][1] += 1 continue transformed_location = location for transform in transforms: transformed_location = transformed_location.replace(*transform) # Handle potential IO errors with a retry pattern... while True: num_errors = 0 try: results = g.geocode(transformed_location, exactly_one=False) print results break except HTTPError, e: num_errors += 1 if num_errors >= 3: sys.exit() print >> sys.stderr, e print >> sys.stderr, 'Encountered an urllib2 error. Trying again...' if results is None: continue for result in results: # Each result is of the form ("Description", (X,Y)) coords_freqs[location] = [result[1], 1] break # Disambiguation strategy is "pick first" # Step 2 - Build up data structure for converting locations to KML expanded_coords = [] for label in coords_freqs: # Flip lat/lon for Google Earth ((lat, lon), f) = coords_freqs[label] expanded_coords.append((label, [(lon, lat)] * f)) # No need to clutter the map with unnecessary placemarks... kml_items = [{'label': label, 'coords': '%s,%s' % coords[0]} for (label, coords) in expanded_coords] # It would also be helpful to include names of your contacts on the map for item in kml_items: item['contacts'] = '\n'.join(['%s %s.' % (c['firstName'], c['lastName']) for c in connections if c.has_key('location') and c['location']['name'] == item['label']]) # Step 3 - Cluster locations and extend the KML data structure with centroids c1 = KMeansClustering([coords for (label, coords_list) in expanded_coords for coords in coords_list]) centroids = [{'label':'CONTROID', 'coords': '%s,%s' % centroid(c)} for c in c1.getclusters(K)] kml_items.extend(centroids) # Step 4 - Create the final KML output and write it to a file kml = createKML(kml_items) f = open(OUT_FILE, 'w') f.write(kml) f.close() print 'Data written to ' + OUT_FILE
[Location(Beijing, Beijing, China 39 54m 0.0s N, 116 23m 0.0s E)] [Location(Beijing, Beijing, China 39 54m 0.0s N, 116 23m 0.0s E)] None [Location(CA, United States 37 43m 0.0s N, 122 15m 0.0s W)] [Location(Birmingham, England, United Kingdom 52 29m 0.0s N, 1 55m 0.0s W), Location(Birmingham, England, United Kingdom 52 27m 0.0s N, 1 43m 0.0s W), Location(Birmingham Airport, England, United Kingdom 52 27m 0.0s N, 1 44m 0.0s W), Location(Birmingham Business Park, England, United Kingdom 52 28m 0.0s N, 1 43m 0.0s W)] [Location(Birmingham, England, United Kingdom 52 29m 0.0s N, 1 55m 0.0s W), Location(Birmingham, England, United Kingdom 52 27m 0.0s N, 1 43m 0.0s W), Location(Birmingham Airport, England, United Kingdom 52 27m 0.0s N, 1 44m 0.0s W), Location(Birmingham Business Park, England, United Kingdom 52 28m 0.0s N, 1 43m 0.0s W)] [Location(China 36 33m 0.0s N, 103 59m 0.0s E)] [Location(China 36 33m 0.0s N, 103 59m 0.0s E)] [Location(Chengdu, Sichuan, China 30 40m 0.0s N, 104 5m 0.0s E)] [Location(Chengdu, Sichuan, China 30 40m 0.0s N, 104 5m 0.0s E)] [Location(Xingtai, Hebei, China 37 4m 0.0s N, 114 29m 0.0s E)] [Location(Xingtai, Hebei, China 37 4m 0.0s N, 114 29m 0.0s E)] [Location(United States 39 27m 0.0s N, 98 57m 0.0s W)] [Location(United States 39 27m 0.0s N, 98 57m 0.0s W)] [Location(Foshan, Guangdong, China 23 2m 0.0s N, 113 6m 0.0s E)] [Location(Foshan, Guangdong, China 23 2m 0.0s N, 113 6m 0.0s E)] Data written to E:\eclipse\LinkedIn\dfile\linkedin_clusters_kmeans.kml
原文地址:http://blog.csdn.net/guaguastd/article/details/38842031