Web maximum-likely.blogspot.com

## Thursday, January 13, 2011

### Groupon

Everyone is talking about Groupon, I wanted to check how fast they are growing. The below shows the global visitors - Groupon has overtaken Yelp but is closely followed by Living Social. It looks as though Living Social could actually overtake Groupon. I am not sure if the dip is due to the incomplete January.

## Wednesday, January 12, 2011

### Kaggle social network challenge - test/train code

For those having participated in the Kaggle social network challenge here is the Python code to split the full downloaded graph into test and training.

#create random sorted train set and test set with equal amounts of true and false edges

import random

samp=9000

#import complete file
f1=open('complete4.txt','r')
f2=open('simplesplit_test.txt','w')
f3=open('simplesplit_validate.txt','w')
f4=open('simplesplit_train.txt','w')

prim=[]
prim_set=set()
sec_set=set()
prim_connections={}
prim_2plus=0
sec_connections={}
sec_2plus=0
for line in f1:
a=line.split(',')
b=line.split(',').strip()
prim.append([a,b,random.random()]) #need rand for later
if a in prim_set: #if seen before
prim_connections[a]+=1
else:
prim_connections[a]=1
if b in sec_set: #if seen before
sec_connections[b]+=1
else:
sec_connections[b]=1

print len(prim),len(prim_connections),len(sec_connections)

#universe of those with 2+ connections
prim_universe=set()
for p in prim_connections.keys():
if prim_connections[p]>1:
prim_2plus+=1

#universe of those with 2+ connections
sec_universe=set()
for p in sec_connections.keys():
if sec_connections[p]>1:
sec_2plus+=1

print prim_2plus,sec_2plus

#chose 2 sets 5000
sample=random.sample(prim_universe,samp)
sample1=set(random.sample(sample,samp/2))
sample2=set([i for i in sample if i not in sample1])

print len(sample),len(sample1),len(sample2)

#sort by random
prim2=sorted(prim,key=lambda rand:rand)

del prim

prim3=[]
sample1_done=set()
for i in prim2:
if i in sample1:
if i not in sample1_done and (sec_connections[i]>1 or i in prim_connections): #not done and inbound has other edge
sec_connections[i]-=1
f2.write(i+','+i+'\n') #test
f3.write(i+','+i+',1\n') #validate
print len(sample1_done)
else:
f4.write(i+','+i+'\n') #train
else:
f4.write(i+','+i+'\n') #train
if i in sample2: #create a subset of prim to speed up non pairs check
prim3.append([i,i])

del prim2

print len(prim3)

#for sample2 chose non connections
count=0
for i in sample2:
if count
done=0
prim4=[j for j in prim3 if i==j] #a subset
while done==0:
rand=random.sample(sec_universe,1) #because 1 returns set
if rand not in prim4 and rand<>i:
done=1
count+=1
print count
f2.write(i+','+rand+'\n') #test
f3.write(i+','+rand+',0\n') #validate
else:
break

f1.close()
f2.close()
f3.close()
f4.close()

## Tuesday, January 11, 2011

### Android lock

I have tried to determine the number of possible combinations on a 3x3 Android pattern lock. I come up with 10,305 combinations which is slightly more than the 10,000 combinations you would get with a 4 number lock. Let me know if you find any errors.

#count how many patterns there are on 3x3 lock
#every point visited once
#can move straight and diagonal
#path length 1 to 9

#length 1 is trivial: 9 possibilities

done=dict()
for c in range(1,4):
for r in range(1,4):
poss=len(done)
done[poss]=[[c,r]]

poss=len(done)
print poss

def posspath(curpath): #returns all possible paths from a curpath
outpath=[]
#can go 8 different ways
c=curpath[-1]
r=curpath[-1]
#if within bounds and not visited
return outpath

for path in range(2,10):
for c in range(1,4):
for r in range(1,4):
curpath=[[c,r]]
nextpath=posspath(curpath)
i=0
while i
if len(nextpath[i])==path:
if nextpath[i] not in done.keys():
poss=len(done)
done[poss]=nextpath[i]
else:
#explore possible and remove original
nextpath2=posspath(nextpath[i])
nextpath.remove(nextpath[i])
nextpath+=nextpath2
i-=1
i+=1

poss=len(done)
print poss