托管至SAE的应用,有时会出现访问流量异常增长的情形。
此时,我们需要对流量异常日期对应的应用访问日志进行分析,从而排查流量异常增长的原因。
一个简单有效的方式就是对访客IP进行统计,因为流量异常很可能是由于少数几个IP在短时间内的大量访问造成的(当然也不排除DDoS攻击)。
要获取SAE的访问日志,可以从SAE的日志面板手工下载,也可以使用SAE的实时日志API,参阅博文:SAE实时日志API Python使用小记。
由于我们只需要使用日志的IP字段,通过SAE实时日志API中fop参数的fields命令可以移除多余的日志字段,从而节省下载流量造成的云豆消耗。
通过下面的Python代码即可实现特定日期的访客IP统计。
在工作目录下创建sae_log_util.py,apibus_handler.py,ip_counter.py并拷贝相应代码,执行ip_counter.py即可。
sae_log_util.py:
#-*-coding: utf8 -*-
#sae_log_util.py
#sae log utility based on sae apibus_handler
#author blog: http://bookshadow.com
#src date: 2015-09-17
status_code_dict = {200 : 'OK', 206 : 'Partial Content', 400 : 'Bad Request', \
500 : 'Internal Server Error' , 404 : 'Not Found'}
service_ident_dict = {'http': ['access', 'error', 'alert', 'debug', 'warning', 'notice'], \
'taskqueue' : ['error'], \
'cron' : ['error'], \
'mail': ['access', 'error'], \
'rdc' : ['error', 'warning'], \
'storage' : ['access'], \
'push' : ['access'], \
'fetchurl' : ['access']
}
_URL_PREFIX = 'http://g.sae.sina.com.cn/log/'
class SaeLogFetcher(object):
def __init__(self, access_key, secret_key):
self.access_key = access_key
self.secret_key = secret_key
def fetch_log(self, service, date, ident, fop = '', version = 1):
assert self.access_key, 'access_key should not be empty'
assert self.secret_key, 'secret_key should not be empty'
assert service in service_ident_dict, 'invalid service parameter'
assert ident in service_ident_dict[service], 'invalid ident parameter'
url = _URL_PREFIX + service + '/' + date + '/' + str(version) + '-' + ident + '.log'
content = None
try:
import requests
from apibus_handler import SaeApibusAuth
r = requests.get(url + ('?' + fop if fop else ''), \
auth=SaeApibusAuth(self.access_key, self.secret_key))
status_code, status = r.status_code, status_code_dict.get(r.status_code, 'Unknown')
if status_code == 200:
content = r.content
except ImportError:
# requests was not present!
from apibus_handler import SaeApibusAuthHandler
import urllib, urllib2
apibus_handler = SaeApibusAuthHandler(self.access_key, self.secret_key)
opener = urllib2.build_opener(apibus_handler)
if fop:
url += '?' + urllib.quote(fop, safe='')
content = opener.open(url).read()
return content
apibus_handler.py:
#-*-coding: utf8 -*-
"""
SAE API auth handler for urllib2 and requests
urllib2:
>>> import urllib2
>>> apibus_handler = SaeApibusAuthHandler(ACCESSKEY, SECRETKEY)
>>> opener = urllib2.build_opener(apibus_handler)
>>> print opener.open('http://g.sae.sina.com.cn/log/http/2015-06-18/1-access.log').read()
requests:
>>> import requests
>>> print requests.get('http://g.sae.sina.com.cn/log/http/2015-06-18/1-access.log?head/0/10|fields/ /1/2/3/4', auth=SaeApibusAuth(ACCESSKEY, SECRETKEY)).content
"""
import hmac
import base64
import hashlib
import time
import urllib
from urllib2 import BaseHandler, Request
_APIBUS_URL_PREFIX = 'http://g.sae.sina.com.cn/'
class SaeApibusAuthHandler(BaseHandler):
# apibus handler must be in front
handler_order = 100
def __init__(self, accesskey, secretkey):
self.accesskey = accesskey
self.secretkey = secretkey
def http_request(self, req):
orig_url = req.get_full_url()
if not orig_url.startswith(_APIBUS_URL_PREFIX):
return req
timestamp = str(int(time.time()))
headers = [
('x-sae-timestamp', timestamp),
('x-sae-accesskey', self.accesskey),
]
req.headers.update(headers)
method = req.get_method()
resource = urllib.unquote(req.get_full_url()[len(_APIBUS_URL_PREFIX)-1:])
sae_headers = [(k.lower(), v.lower()) for k, v in req.headers.items() if k.lower().startswith('x-sae-')]
req.add_header('Authorization', _signature(self.secretkey, method, resource, sae_headers))
return req
https_request = http_request
try:
from requests.auth import AuthBase
class SaeApibusAuth(AuthBase):
"""Attaches HTTP Basic Authentication to the given Request object."""
def __init__(self, accesskey, secretkey):
self.accesskey = accesskey
self.secretkey = secretkey
def __call__(self, r):
timestamp = str(int(time.time()))
r.headers['x-sae-timestamp'] = timestamp
r.headers['x-sae-accesskey'] = self.accesskey
resource = urllib.unquote(r.url[len(_APIBUS_URL_PREFIX)-1:])
#resource = r.url[len(_APIBUS_URL_PREFIX)-1:]
sae_headers = [(k.lower(), v.lower()) for k, v in r.headers.items() if k.lower().startswith('x-sae-')]
r.headers['Authorization'] = _signature(self.secretkey, r.method, resource, sae_headers)
return r
except ImportError:
# requests was not present!
pass
def _signature(secret, method, resource, headers):
msgToSign = "\n".join([
method, resource,
"\n".join([(k + ":" + v) for k, v in sorted(headers)]),
])
return "SAEV1_HMAC_SHA256 " + base64.b64encode(hmac.new(secret, msgToSign, hashlib.sha256).digest())
ip_counter.py:
#-*-coding: utf8 -*-
#ip_counter.py
#ip counter based on sae_log_util
#author blog: http://bookshadow.com
#src date: 2015-09-17
from collections import Counter
from sae_log_util import SaeLogFetcher
date = '2015-09-16'
service = 'http'
ident = 'access'
fop = 'fields/ /2' #fetch ip only
version = 1
ACCESSKEY = '<<ACCESSKEY>>'
SECRETKEY = '<<SECRETKEY>>'
log_fetcher = SaeLogFetcher(ACCESSKEY, SECRETKEY)
result = log_fetcher.fetch_log(service, date, ident, fop, version)
content = result.split('\n')[:-1]
for e, c in Counter(content).most_common():
print e, c
运行代码之前,将代码内的<<ACCESSKEY>>
,与<<SECRETKEY>>
替换为应用的公钥和密钥。
在命令行下执行:ip_counter.py > ip_cnt.txt
执行完毕后,IP汇总数据将输出至ip_cnt.txt文件中。
本文链接:http://bookshadow.com/weblog/2015/09/17/sae-log-api-ip-counter/
请尊重作者的劳动成果,转载请注明出处!书影博客保留对文章的所有权利。