pc/analyze.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80

import re
from collections import defaultdict

class Dumps:
    def __init__(self):
        self.dumps = []

    def load(self, f):
        dump = []
        tag = None
        no = 0
        for line in open(f,"r").readlines():
            if re.match("[0-9A-Z]{2}( [0-9A-Z]{2}){25} *$", line):
                # this is a line of a hexdump
                dump += [int(s,16) for s in line.split()]
            else:
                if len(dump):
                    self.dumps += [(no,tag,dump)]
                    dump = []
                    tag = None
                    no+=1

                if line.strip() != "":
                    if tag != None:
                        print("overriding tag %s with %s"%(tag,line))

                    tag = line.replace("\n","")
                # else ignore

    def show(self,xcols,xrows):
        if xcols == None: xcols = []
        if xrows == None: xrows = []

        for n, tag, dump in self.filter(xcols,xrows):
            for byte in dump:
                print("%02X "%byte, end="") # TODO
            print(tag)


    def filter(self,xcols,xrows):
        return [ (dump[0], dump[1], [byte for byte,pos in zip(dump[2],range(len(dump[2]))) if pos not in xcols ]) for dump in self.dumps if dump[0] not in xrows]

def mirror(dumps):
    n = len(dumps)
    m = len(dumps[0][2])
    result = [[None]*n for i in range(m)]
    for i in range(n):
        for j in range(m):
            result[j][i] = dumps[i][2][j]
    return result

def stats(cols):
    ncols = len(cols)

    result = [defaultdict(lambda : 0) for i in range(ncols)]
    for col,stat in zip(cols,result):
        for a,b in zip(col,col[1:]):
            stat[(b-a)%256]+=1./(len(col)-1)
    return result

def getconstcols(stats):
    return [col for stat,col in zip(stats,range(len(stats))) if stat[0]>0.9999]

def showstats(stats):
    for stat,pos in zip(stats,range(len(stats))):
        print("%i:\t%s"%(pos,str(dict(stat))))

def prettydict(d):
    for k,v in sorted(d.items(), key=lambda a:-a[1]):
        print("%02X: %2.0f | "%(k,100*v),end="")
    print("")

d = Dumps()
d.load("mydump.txt")

cc = getconstcols(stats(mirror(d.filter([],[]))))
d.show(cc,[])

for i in stats(mirror(d.filter(cc,[]))):
    prettydict(i)