python 및 머신러닝 교육, 슬로우캠퍼스


  • Google App Engine(GAE)의  urlfetch 이용하여 URL 페이지 읽어오기
  • python 기본 패키지인 urllib2 이용하여 URL 페이지 읽어오기




def get_url(word):
try:
return get_url_gae(word)
except:
return get_wiktion_xml_local(word)

def get_url_gae(word):
from google.appengine.api import urlfetch

url = 'http://abc.com/%s' % (word)
# Changing User-Agent
# https://developers.google.com/appengine/docs/python/urlfetch/#Request_Headers
# http://stackoverflow.com/questions/2743521/how-to-change-user-agent-on-google-app-engine-urlfetch-service
result = urlfetch.fetch(url=url,
headers={"User-agent", "jjjj/0.1 (2001-07-14)"}
)
if result.status_code == 200:
return result.content.decode('utf-8')
else:
return ''

def get_url_local(word):
import urllib2

url = 'http://abc.com/%s' % (word)
req = urllib2.Request(url)
req.add_header("User-agent", "jjjj/0.1 (2001-07-14)")
fp = urllib2.urlopen(req)
return fp.read().decode('utf-8')



WRITTEN BY
manager@
Data Analysis, Text/Knowledge Mining, Python, Cloud Computing, Platform

,