Google


Thursday, January 13, 2011

Groupon

Everyone is talking about Groupon, I wanted to check how fast they are growing. The below shows the global visitors - Groupon has overtaken Yelp but is closely followed by Living Social. It looks as though Living Social could actually overtake Groupon. I am not sure if the dip is due to the incomplete January.

How to hack Google charts

I ran a comparison on Google Trends and this is the location of the chart png. As you can see it has lots of colours in there and labels.

http://chart.apis.google.com/chart?cht=lc&chd=e:2Y2Y2Y2Y2Y2Y2Y2Y,B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6,nBnB,RRRR,________________________________AYBDBUBMBNBbByCJCzCcDlDrETFOF1GyGjHSHoJXKmLrLnMAMmMqOZQiUkTcSzUKVZbKaqe3nek4hK,AYAYAYAYAYAYAYAYAYAYAYAYAYAYAYAY______________________________________________________________________________,AVATARAKAcATASBeDzBIBSBBAkAdAiAyA2A2A.BTA6A4A1BGAwA8A2BHB7BkCiCnCcDDDiE5FZITIpHqGHHkIQHnJwJ1I5PDMWL8O3PNSyYecW,,MUMgN7OSO2QFPjQYQwQeQHRXRrSYTRTtUxVvVkV8T8UmU0UsVGSxRWSITyXFWwXgXMXmV8XKYvY7XsYCYnYQbSbSa6axa1Z3Z4dXYiUKWsXSYS,&chds=0.0,2100000.0&chs=580x188&chco=ffffff00,ffffff00,ffffff00,ffffff00,4684eeff,4684eeff,dc3912ff,4684eeff,ff9900ff,4684eeff&chls=1.0,1.0,0.0%7C1.0,1.0,0.0%7C1.0,1.0,0.0%7C1.0,1.0,0.0%7C1.75,1.0,0.0%7C1.5,3.0,3.0%7C1.75,1.0,0.0%7C1.5,3.0,3.0%7C1.75,1.0,0.0%7C1.5,3.0,3.0&chxt=x&chxr=0,0.0,100.0&chxl=0:%7C%7CJan+2009%7C%7C%7CApr+2009%7C%7C%7CJul+2009%7C%7C%7COct+2009%7C%7C%7CJan+2010%7C%7C%7CApr+2010%7C%7C%7CJul+2010%7C%7C%7COct+2010%7C%7C%7C&chxs=0,443322ff,9.0,0.0&chm=v,443322ff,1,-1,1%7Ct+Daily+Unique+Visitors,676767ff,0,0,10,1%7Ct+Google+Trends,676767ff,0,6,10,1%7Ct+1.4+M,676767ff,2,0,10,1%7Ct+700+K,676767ff,3,0,10,1&chg=12.0,33.33,1.0,1.0,4.0

For instance I could change the 'Google Trends' in the top right to my name. I tried changing the tick at 700 to 600 but that changes just the label not the data. You could try to increase the dimensions from 580x188 to something bigger.

http://chart.apis.google.com/chart?cht=lc&chd=e:2Y2Y2Y2Y2Y2Y2Y2Y,B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6B6,nBnB,RRRR,________________________________AYBDBUBMBNBbByCJCzCcDlDrETFOF1GyGjHSHoJXKmLrLnMAMmMqOZQiUkTcSzUKVZbKaqe3nek4hK,AYAYAYAYAYAYAYAYAYAYAYAYAYAYAYAY______________________________________________________________________________,AVATARAKAcATASBeDzBIBSBBAkAdAiAyA2A2A.BTA6A4A1BGAwA8A2BHB7BkCiCnCcDDDiE5FZITIpHqGHHkIQHnJwJ1I5PDMWL8O3PNSyYecW,,MUMgN7OSO2QFPjQYQwQeQHRXRrSYTRTtUxVvVkV8T8UmU0UsVGSxRWSITyXFWwXgXMXmV8XKYvY7XsYCYnYQbSbSa6axa1Z3Z4dXYiUKWsXSYS,&chds=0.0,2100000.0&chs=580x188&chco=ffffff00,ffffff00,ffffff00,ffffff00,4684eeff,4684eeff,dc3912ff,4684eeff,ff9900ff,4684eeff&chls=1.0,1.0,0.0|1.0,1.0,0.0|1.0,1.0,0.0|1.0,1.0,0.0|1.75,1.0,0.0|1.5,3.0,3.0|1.75,1.0,0.0|1.5,3.0,3.0|1.75,1.0,0.0|1.5,3.0,3.0&chxt=x&chxr=0,0.0,100.0&chxl=0:||Jan+2009|||Apr+2009|||Jul+2009|||Oct+2009|||Jan+2010|||Apr+2010|||Jul+2010|||Oct+2010|||&chxs=0,443322ff,9.0,0.0&chm=v,443322ff,1,-1,1|t+Daily+Unique+Visitors,676767ff,0,0,10,1|t+Dirk+nachbar,676767ff,0,6,10,1|t+1.4+M,676767ff,2,0,10,1|t+700+K,676767ff,3,0,10,1&chg=12.0,33.33,1.0,1.0,4.0

Wednesday, January 12, 2011

Kaggle social network challenge - test/train code

For those having participated in the Kaggle social network challenge here is the Python code to split the full downloaded graph into test and training.

#create random sorted train set and test set with equal amounts of true and false edges

import random

samp=9000

#import complete file
f1=open('complete4.txt','r')
f2=open('simplesplit_test.txt','w')
f3=open('simplesplit_validate.txt','w')
f4=open('simplesplit_train.txt','w')


prim=[]
prim_set=set()
sec_set=set()
prim_connections={}
prim_2plus=0
sec_connections={}
sec_2plus=0
for line in f1:
    a=line.split(',')[0]
    b=line.split(',')[1].strip()
    prim.append([a,b,random.random()]) #need rand for later
    if a in prim_set: #if seen before
        prim_connections[a]+=1
    else:
        prim_connections[a]=1       
    if b in sec_set: #if seen before
        sec_connections[b]+=1
    else:
        sec_connections[b]=1       
    prim_set.add(a)
    sec_set.add(b)
   
print len(prim),len(prim_connections),len(sec_connections)

#universe of those with 2+ connections
prim_universe=set()
for p in prim_connections.keys():
    if prim_connections[p]>1:
        prim_2plus+=1
        prim_universe.add(p)

#universe of those with 2+ connections
sec_universe=set()
for p in sec_connections.keys():
    if sec_connections[p]>1:
        sec_2plus+=1
        sec_universe.add(p)
       
print prim_2plus,sec_2plus

#chose 2 sets 5000
sample=random.sample(prim_universe,samp)
sample1=set(random.sample(sample,samp/2))
sample2=set([i for i in sample if i not in sample1])

print len(sample),len(sample1),len(sample2)

#sort by random
prim2=sorted(prim,key=lambda rand:rand[2])

del prim

prim3=[]
sample1_done=set()
for i in prim2:
    if i[0] in sample1:
        if i[0] not in sample1_done and (sec_connections[i[1]]>1 or i[1] in prim_connections): #not done and inbound has other edge
            sec_connections[i[1]]-=1
            f2.write(i[0]+','+i[1]+'\n') #test
            f3.write(i[0]+','+i[1]+',1\n') #validate
            sample1_done.add(i[0]) #is done
            print len(sample1_done)
        else:
            f4.write(i[0]+','+i[1]+'\n') #train       
    else:
        f4.write(i[0]+','+i[1]+'\n') #train
        if i[0] in sample2: #create a subset of prim to speed up non pairs check
            prim3.append([i[0],i[1]])

del prim2

print len(prim3)

#for sample2 chose non connections
count=0
for i in sample2:
    if count
        done=0
        prim4=[j[1] for j in prim3 if i==j[0]] #a subset
        while done==0:
            rand=random.sample(sec_universe,1)[0] #because 1 returns set
            if rand not in prim4 and rand<>i:
                done=1
                count+=1
        print count
        f2.write(i+','+rand+'\n') #test
        f3.write(i+','+rand+',0\n') #validate
    else:
        break

f1.close()
f2.close()
f3.close()
f4.close()

Tuesday, January 11, 2011

Android lock

I have tried to determine the number of possible combinations on a 3x3 Android pattern lock. I come up with 10,305 combinations which is slightly more than the 10,000 combinations you would get with a 4 number lock. Let me know if you find any errors.

#count how many patterns there are on 3x3 lock
#every point visited once
#can move straight and diagonal
#path length 1 to 9

#length 1 is trivial: 9 possibilities

done=dict()
for c in range(1,4):
    for r in range(1,4):
        poss=len(done)
        done[poss]=[[c,r]]

poss=len(done)
print poss

def posspath(curpath): #returns all possible paths from a curpath
    outpath=[]
    #can go 8 different ways
    for add in ([0,1],[1,0],[1,1],[0,-1],[-1,0],[1,-1],[-1,1],[-1,-1]):
        c=curpath[-1][0]
        r=curpath[-1][1]
        #if within bounds and not visited
        if 1<=c+add[0]<=3 and 1<=r+add[1]<=3 and [c+add[0],r+add[1]] not in curpath:
            outpath+=[curpath+[[c+add[0],r+add[1]]]]
    return outpath
   
for path in range(2,10):
    for c in range(1,4):
        for r in range(1,4):
            curpath=[[c,r]]
            nextpath=posspath(curpath)
            i=0
            while i
                if len(nextpath[i])==path:
                    if nextpath[i] not in done.keys():
                        poss=len(done)
                        done[poss]=nextpath[i]
                else:
                    #explore possible and remove original
                    nextpath2=posspath(nextpath[i])
                    nextpath.remove(nextpath[i])
                    nextpath+=nextpath2
                    i-=1
                i+=1
           
poss=len(done)
print poss