原理:http://my.oschina.net/u/1458120/blog/545224python
bitmap包:https://github.com/Doist/bitmapistgit
要求redis>= 2.6.0 github
bitmapist-master.zip 安裝本身下載解壓 python setup.py installredis
(因爲包內默認鏈接本機redis 6379 端口 ,若是想要修改 把下載的包放到本身的項目中 修改__init__.py中api
SYSTEMS = { 'default': redis.Redis(host='192.168.1.3', port=6379) }
)socket
不能運行在集羣模式下,使用時最好keys *查看存過哪些key 設置過時時間
函數
事件標記默認天周月測試
只標記天可修改__init__.py 中_mark()函數ui
# obj_classes = [MonthEvents, WeekEvents, DayEvents]
# obj_classes = [DayEvents]
推薦閱讀:url
http://www.zhihu.com/question/21581696
(cohort analysis 同期羣分析)感受bitmapist的 cohort 講的就是這
聲明:本文的測試大部分都是 開源中的原測試加上本身的一些理解,讀者能夠直接看源文
# -*- coding: utf-8 -*- # from builtins import range import os import traceback import subprocess import atexit import socket import time import pytest from bitmapist.cohort import get_dates_data from bitmapist import setup_redis, delete_all_events from datetime import datetime, timedelta from bitmapist import mark_event, unmark_event,\ MonthEvents, WeekEvents, DayEvents, HourEvents,\ BitOpAnd, BitOpOr, get_event_names from bitmapist.cohort import get_dates_data ################################################################## #下面是啓動redis服務 若是已經啓動可忽略 def redis_server(): """ Fixture starting the Redis server """ redis_host = '192.168.15.100' redis_port = 6379 if is_socket_open(redis_host, redis_port): yield None else: proc = start_redis_server(redis_port) wait_for_socket(redis_host, redis_port) yield proc proc.terminate() def setup_redis_for_bitmapist(): setup_redis('default', '192.168.15.100', 6379) setup_redis('default_copy', 'l92.168.8.101', 6380) def start_redis_server(port): """ Helper function starting Redis server """ devzero = open(os.devnull, 'r') devnull = open(os.devnull, 'w') #查看本身的redis-serverm命令 proc = subprocess.Popen(['/usr/local/redis/bin/redis-server', '--port', str(port)], stdin=devzero, stdout=devnull, stderr=devnull, close_fds=True)#close_fds關閉子進程 atexit.register(lambda: proc.terminate()) return proc def is_socket_open(host, port): """ Helper function which tests is the socket open """ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(0.1) return sock.connect_ex((host, port)) == 0 def wait_for_socket(host, port, seconds=3): """ Check if socket is up for :param:`seconds` sec, raise an error otherwise """ polling_interval = 0.1 iterations = int(seconds / polling_interval) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(0.1) for _ in range(iterations): result = sock.connect_ex((host, port)) if result == 0: sock.close() break time.sleep(polling_interval) else: raise RuntimeError('Service at %s:%d is unreachable' % (host, port)) ######################################################################################## def base_test(): try: setup_redis_for_bitmapist() redis_server() #mark_event的兩個重要參數now=None, track_hourly=None, # track_hourly =True標記用戶小時活躍 默認只標記到月周天 #now=None時指如今時間 不然 指定到特定的用戶活躍時間 mark_event('active', 123, track_hourly=True) now = datetime.utcnow() unmark_event('active', 124, track_hourly=True)#取消標記 print MonthEvents('active', now.year, now.month).has_events_marked() print set(get_event_names(batch=2))#獲取全部的事件名稱set(["active"]) print set(get_event_names(prefix='b', batch=2))#獲取全部以b開頭的事件名稱set()set([]) # Month assert 123 in MonthEvents('active', now.year, now.month) assert 124 not in MonthEvents('active', now.year, now.month) # Week assert 123 in WeekEvents('active', now.year, now.isocalendar()[1]) assert 124 not in WeekEvents('active', now.year, now.isocalendar()[1]) # Day assert 123 in DayEvents('active', now.year, now.month, now.day) assert 124 not in DayEvents('active', now.year, now.month, now.day) # Hour assert 123 in HourEvents('active', now.year, now.month, now.day, now.hour) assert 124 not in HourEvents('active', now.year, now.month, now.day, now.hour) assert 124 not in HourEvents('active', now.year, now.month, now.day, now.hour-1) mark_event("active",124) #標記用戶125本月活躍 mark_event('active', 125) assert 125 in MonthEvents('active', now.year, now.month) #取消用戶125本月活躍 unmark_event('active', 125) assert 125 not in MonthEvents('active', now.year, now.month) yesterday = now - timedelta(days=1) mark_event('active', 126, now=now) mark_event('active', 127, now=yesterday)#把用戶活躍標記到昨天的月周天上 #獲取活躍的用戶數 提示錯誤沒有bitcount命令 要求redis>= 2.6.0 info查看redis版本 print MonthEvents('active', now.year, now.month).get_count()#4 print list(WeekEvents('active', now.year, now.isocalendar()[1]))# [123, 124, 126, 127] print list(MonthEvents('active', now.year, now.month))#[123, 124, 126, 127] ev = DayEvents('active', now.year, now.month,now.day) # [123, 124, 126] print list(ev)#3 print len(ev)#3 print list(DayEvents('active',yesterday.year,yesterday.month,yesterday.day))#[127] last_month = datetime.utcnow() - timedelta(days=30) ago3_month = datetime.utcnow() - timedelta(days=60) # 123,127 128上月活躍,127 上上月活躍 mark_event('active', 127, now=ago3_month) mark_event('active', 123, now=last_month) mark_event('active', 127, now=last_month) mark_event('active', 128, now=last_month) # 近三個月都活躍的用戶(交集) active_3_months = BitOpAnd( MonthEvents('active', ago3_month.year, ago3_month.month), MonthEvents('active', last_month.year, last_month.month), MonthEvents('active', now.year, now.month) ) print list(active_3_months)#[127] active_3_months.delete() #嵌套查詢 active_3_months = BitOpAnd( MonthEvents('active', ago3_month.year, ago3_month.month), MonthEvents('active', last_month.year, last_month.month), BitOpAnd( MonthEvents('active', now.year, now.month)) ) print list(active_3_months)#[127] # 近三個月活躍過的用戶(並集) print list(BitOpOr( MonthEvents('active', ago3_month.year, ago3_month.month), MonthEvents('active', last_month.year, last_month.month), MonthEvents('active', now.year, now.month)))#[123, 124, 126, 127, 128] #用備份redis查詢 active_2_months = BitOpAnd( 'default_copy', MonthEvents('active', last_month.year, last_month.month), MonthEvents('active', now.year, now.month) ) print list(active_2_months) active_2_months.delete() delete_all_events()#清理全部的bitmap except: traceback.print_exc() def test_bit_operations_complex(): now = datetime.utcnow() tom = now + timedelta(days=1) mark_event('task1', 111, now=now) mark_event('task1', 111, now=tom) mark_event('task2', 111, now=now) mark_event('task2', 111, now=tom) mark_event('task1', 222, now=now) mark_event('task1', 222, now=tom) mark_event('task2', 222, now=now) mark_event('task2', 222, now=tom) now_events = BitOpAnd( DayEvents('task1', now.year, now.month, now.day), DayEvents('task2', now.year, now.month, now.day) ) tom_events = BitOpAnd( DayEvents('task1', tom.year, tom.month, tom.day), DayEvents('task2', tom.year, tom.month, tom.day) ) both_events = BitOpAnd(now_events, tom_events) print list(both_events) def test_bitop_key_sharing(): #測試key共享 today = datetime.utcnow() #假設task1爲唱歌 task2爲跳舞 mark_event('task1', 111, now=today) mark_event('task2', 111, now=today) mark_event('task1', 222, now=today) mark_event('task2', 222, now=today) ev1_task1 = DayEvents('task1', today.year, today.month, today.day) ev1_task2 = DayEvents('task2', today.year, today.month, today.day) ev1_both = BitOpAnd(ev1_task1, ev1_task2)#今天即唱歌又跳舞的 ev2_task1 = DayEvents('task1', today.year, today.month, today.day) ev2_task2 = DayEvents('task2', today.year, today.month, today.day) ev2_both = BitOpAnd(ev2_task1, ev2_task2) print ev1_both,ev2_both print ev1_both.redis_key,ev2_both.redis_key print len(ev1_both), len(ev2_both)#2,2 ev1_both.delete() print len(ev1_both), len(ev2_both)#0,0 同一個查詢的實例相同 def test_bit_operations_magic(): delete_all_events() mark_event('foo', 1) mark_event('foo', 2) mark_event('bar', 2) mark_event('bar', 3) foo = DayEvents('foo') bar = DayEvents('bar') print list(foo & bar)#交集 print list(foo | bar)#並集 print list(foo ^ bar)#異或 (foo和bar不相同的) print list(~foo & bar) #[2] #[1, 2, 3] #[1, 3] #[3] def test_cohort(): '''測試場景:當天註冊用戶 三天內天天有多少活躍過''' today = datetime.utcnow()#20160608 yes = today - timedelta(days=1) ago2 = today - timedelta(days=2)#20160606 tomorrow = today +timedelta(days=1) after2 = today +timedelta(days=2) ####################################前天註冊用戶追蹤 #前天註冊 mark_event("regist",111,now=ago2) mark_event("regist",112,now=ago2) mark_event("regist",113,now=ago2) mark_event("regist",114,now=ago2) mark_event("regist",115,now=ago2) #前天活躍 mark_event("active",111,now=ago2) mark_event("active",112,now=ago2) mark_event("active",113,now=ago2) mark_event("active",114,now=ago2) #昨天活躍的 mark_event("active",111,now=yes) mark_event("active",112,now=yes) mark_event("active",113,now=yes) #今天活躍的 mark_event("active",115,now=today) ############################昨天註冊用戶追蹤 #昨天註冊 mark_event("regist",116,now=yes) mark_event("regist",117,now=yes) mark_event("regist",118,now=yes) mark_event("regist",119,now=yes) mark_event("regist",120,now=yes) #昨天活躍 mark_event("active",116,now=yes) mark_event("active",117,now=yes) mark_event("active",118,now=yes) mark_event("active",119,now=yes) #今天活躍 mark_event("active",119,now=today) mark_event("active",120,now=today) #明天活躍 mark_event("active",116,now=tomorrow) ########################################################今天註冊用戶追蹤 #今天註冊 mark_event("regist",121,now=today) mark_event("regist",122,now=today) mark_event("regist",123,now=today) #今天活躍 mark_event("active",121,now=today) mark_event("active",122,now=today) #明天活躍 mark_event("active",121,now=tomorrow) #後天活躍 mark_event("active",123,now=after2) for select1,select1b,select2,select2b in [('regist', None, 'active', None)]: '''select1:初始條件 本例指註冊 select1b:select1的附屬條件 例如中國註冊用戶(mark_event("registchina",123,now=today)) select2:初始條件下的過濾 select2b:同select1b time_group:時間跨度 `days`, `weeks`, `months`, `years` as_precent:0輸出通過select過濾後的數量 1百分比 num_results:time_group+num_results 獲得關注的時間範圍 如近三天、三週、三月 三年 num_of_rows:對select1(或select1+select1b)條件下的select2(或select2+select2b) 輸出將來幾天(周/月/年)的 本例:近三天(time_group+num_results=3days)當天註冊用戶(select1='regist') 三天內(num_of_rows=3)天天的活躍(select2='active')人數 ''' r = get_dates_data(select1=select1, select1b=select1b, select2=select2, select2b=select2b, time_group='days', as_precent=0, num_results=3, num_of_rows=3) print list(r) #當天註冊 #當天活躍 #將來三天活躍 # [[datetime.datetime(2016, 6, 6, 6, 21, 43, 845437), 5, 4, 3, 1, 0.0], # [datetime.datetime(2016, 6, 7, 6, 21, 43, 845437), 5, 4, 2, 1, 0.0], # [datetime.datetime(2016, 6, 8, 6, 21, 43, 845437), 3, 2, 1, 1, '']] #以6號爲例 20160606 當天註冊5人 4人活躍 將來三天20160607 在20160606註冊的人中其中3人活躍 20160608 1人活躍 20160609 0人活躍(0.0表明有人活躍但不在20160606的註冊人中 '' 表明壓根沒人活躍) #redis中查找key: #:6379> BITCOUNT trackist_regist_2016-6-7 #6379> BITCOUNT trackist_regist_W2016-27 #6379> BITCOUNT trackist_buyid_regist_2016-6 if __name__ == '__main__': test_bitop_key_sharing() delete_all_events() test_bit_operations_magic() delete_all_events() test_cohort()