[long code post warning] Studying some concepts related to Google: Sorts, Searches, Hash-Tables, & Map-Reduce

The code is just a basic, rough implementation of this stuff in case anyone is interested in studying the theory of how they work below (no comments or good variable names included, sorry…). It covers a: Merge sort, Quick sort, Radix (LSD) sort, Binary search, Linked-List hash table, & Pipe/Fork-Word-Counting map reduce (in Python & C).

Running sorts.py

('key-hash', 'jonnjonn', '->', '0x8bb05860')
('key-value:', 'jonnjonn', '=', {'home': '/home/jonnjonn', 'gid': 4, 'uid': 4, 'name': 'Jon Chipps'})
('key-value:', 'jonnjonn', '=', {'home': '/home/_jonnjonn_', 'gid': 4, 'uid': 4, 'name': 'Jon Chipps'})

('key-hash', 'jonnjono', '->', '0xf4c7560')
('key-value:', 'jonnjono', '=', {'home': '/home/jonnjono', 'gid': 5, 'uid': 5, 'name': 'Jon Chipps'})
('key-value:', 'jonnjono', '=', {'home': '/home/_jonnjono_', 'gid': 5, 'uid': 5, 'name': 'Jon Chipps'})

('key-hash', 'jonxjonp', '->', '0x2494c160')
('key-value:', 'jonxjonp', '=', {'home': '/home/jonxjonp', 'gid': 6, 'uid': 6, 'name': 'Jon Chipps'})
('key-value:', 'jonxjonp', '=', {'home': '/home/_jonxjonp_', 'gid': 6, 'uid': 6, 'name': 'Jon Chipps'})

('key-hash', 'jonpjonx', '->', '0xe346c4e0')
('key-value:', 'jonpjonx', '=', {'home': '/home/jonpjonx', 'gid': 7, 'uid': 7, 'name': 'Jon Chipps'})
('key-value:', 'jonpjonx', '=', {'home': '/home/_jonpjonx_', 'gid': 7, 'uid': 7, 'name': 'Jon Chipps'})

('map-reduce', 'search', ['better', 8])

Sorting...
('merge_r:', 262145, 'items', 'took', 1.4660530090332031, 'seconds', '385a7adb')
('quick_r:', 262145, 'items', 'took', 1.087796926498413, 'seconds', '385a7adb')
('radixlsd_f:', 262145, 'items', 'took', 1.1461758613586426, 'seconds', '385a7adb')
Searching...
('bin_search_r:', True, 'index', 1368, '=', 51966, 'took', 2.5033950805664062e-05, 'seconds')

Running ./sorts 2> /dev/null

key-hash:jonnjonn=0x8bb05860
look:22624/-1/jonnjonn:(nil)/(nil)/(nil)
look:22624/0/jonnjonn:0x20c2060/0x20c2010/(nil)
jonnjonn={uid:jonnjonn, name:Jon Chipps, uid:1, gid:1, home:/home/jonnjonn}
look:22624/0/jonnjonn:0x20c2060/0x20c2010/(nil)
look:22624/0/jonnjonn:0x20c2060/0x20c2080/(nil)
jonnjonn={uid:jonnjonn, name:Jon Chipps, uid:1, gid:1, home:/home/_jonnjonn_}

key-hash:jonnjono=0xf4c7560
look:30048/-1/jonnjono:(nil)/(nil)/(nil)
look:30048/0/jonnjono:0x20c20d0/0x20c2010/(nil)
jonnjono={uid:jonnjono, name:Jon Chipps, uid:2, gid:2, home:/home/jonnjono}
look:30048/0/jonnjono:0x20c20d0/0x20c2010/(nil)
look:30048/0/jonnjono:0x20c20d0/0x20c20f0/(nil)
jonnjono={uid:jonnjono, name:Jon Chipps, uid:2, gid:2, home:/home/_jonnjono_}

key-hash:jonxjonp=0x2494c160
look:49504/-1/jonxjonp:(nil)/(nil)/(nil)
look:49504/0/jonxjonp:0x20c2140/0x20c2010/(nil)
jonxjonp={uid:jonxjonp, name:Jon Chipps, uid:3, gid:3, home:/home/jonxjonp}
look:49504/0/jonxjonp:0x20c2140/0x20c2010/(nil)
look:49504/0/jonxjonp:0x20c2140/0x20c2160/(nil)
jonxjonp={uid:jonxjonp, name:Jon Chipps, uid:3, gid:3, home:/home/_jonxjonp_}

key-hash:jonpjonx=0xe346c4e0
look:50400/-1/jonpjonx:(nil)/(nil)/(nil)
look:50400/0/jonpjonx:0x20c21b0/0x20c2010/(nil)
jonpjonx={uid:jonpjonx, name:Jon Chipps, uid:4, gid:4, home:/home/jonpjonx}
look:50400/0/jonpjonx:0x20c21b0/0x20c2010/(nil)
look:50400/0/jonpjonx:0x20c21b0/0x20c21d0/(nil)
jonpjonx={uid:jonpjonx, name:Jon Chipps, uid:4, gid:4, home:/home/_jonpjonx_}

merge-r:70/ms
quick-r:40/ms
radixlsd-r:30/ms
binsearch-r[13455]=51966

mapreduce-search('better'):8

–code–

sorts.py

#!/usr/bin/python

import hashlib
import os
import random
import re
import select
import sys
import time

def merge_r(list):
	m = len(list)
	if (m < 2):
		return list
	q = (m / 2)
	l = list[0:q]
	r = list[q:m]
	n = (m - q)
	# recursive call on half-split lists
	l = merge_r(l)
	r = merge_r(r)
	# merge sort core loop
	i = 0; li = 0; ri = 0
	while (i < m):
		if ((li < q) and (ri < n)):
			if (l[li] < r[ri]):
				list[i] = l[li]; i += 1; li += 1
			else:
				list[i] = r[ri]; i += 1; ri += 1
		else:
			if (li >= q):
				while (ri < n):
					list[i] = r[ri]; i += 1; ri += 1
			if (ri >= n):
				while (li < q):
					list[i] = l[li]; i += 1; li += 1
	return list

def quick_r(list):
	llen = len(list)
	if (llen < 2):
		return list
	lptr = 0; pivp = (llen / 2); rptr = (llen - 1)
	pivv = list[pivp]
	# search for any left & right values to swap around our pivot point
	while (1):
		if (lptr > rptr):
			break
		# ignore any left & right values which are already in place
		while (list[lptr] < pivv):
			lptr += 1
		while (list[rptr] > pivv):
			rptr -= 1
		# perform a left & right swap if the left index is less than the right index
		if (lptr <= rptr):
			temp = list[lptr]; list[lptr] = list[rptr]; list[rptr] = temp
			lptr += 1; rptr -= 1
	# note: at this point the left & right pointers should have crossed each other
	# recursive call on the left-half list
	l = quick_r(list[0:rptr+1])
	# save any middle items that are not being processed any further
	m = []
	if ((lptr - rptr) > 1):
		m = list[rptr+1:lptr]
	# recursive call on the right-half list
	r = quick_r(list[lptr:llen])
	return (l + m + r)

def radixlsd_f(list):
	llen = len(list)
	if (llen < 2):
		return list
	i = 0; m = 1
	while (i < m):
		# create buckets for decimal digits 0-9
		buckets = (10*[None])
		x = 0
		while (x < llen):
			# find the longest decimal integer during the first round
			if (i == 0):
				temp = abs(list[x]); d = 1
				while (temp > 0):
					temp = (temp / 10)
					d += 1
				m = max(m, d)
			# place the integer in its related bucket for this round
			b = ((list[x] / (10 ** i)) % 10)
			if (buckets[b] == None):
				buckets[b] = []
			buckets[b].append(list[x])
			x += 1
		# assign the bucket lists back into the main list to be sorted again
		blist = []
		for k in range(0, 10):
			if (buckets[k] != None):
				blist.extend(buckets[k])
		#print(list,buckets[0:10],blist)
		list = blist
		i += 1
	return list

def bin_search_r(list, item, lo=None, hi=None):
	if ((lo == None) or (hi == None)):
		lo = 0
		hi = len(list)
	mid = ((hi - lo) / 2)
	#print(lo,mid,hi,list[lo+mid])
	if (list[lo + mid] == item):
		return (lo + mid)
	if ((hi - lo) < 1):
		return -1
	if (list[lo + mid] < item):
		return bin_search_r(list, item, lo=lo+mid+1, hi=hi)
	return bin_search_r(list, item, lo=lo, hi=hi-mid-1)

class HashTable:
	def __init__(self):
		self.size = (2**16)
		self.data = ([[]]*self.size)
	
	def hash(self, data):
		i = 0; l = len(data); j = 0
		p = 0; z = l; h = 0
		while ((j < l) or (j < 256)):
			c = ord(data[i])
			s = (24 - ((j % 4) * 8))
			p = (p | (c << s))
			if (s == 0):
				#print("hash",hex(h),"^",hex(p),"^",hex(z))
				h = (h ^ (p ^ z))
				p = 0
			z = (((z+(c+j)) + (z^(c^j)) + (z*(c*j)) + (z&(c&j))) & 0xffffffff)
			i = ((i + 1) % l)
			j = (j + 1)
		if (p != 0):
			#print("hash",hex(h),"^",hex(p),"^",hex(z))
			h = (h ^ (p ^ z))
		return h
	
	def look(self, k):
		i = (self.hash(k) % self.size)
		j = -1
		for e in self.data[i]:
			if (e[0] == k):
				j = ((j + 1) * -1)
				break
			j -= 1
		return [i, j]
	
	def g(self, k):
		k = str(k)
		z = self.look(k)
		i = z[0]; j = z[1]
		#print("get",i,j)
		try:
			return self.data[i][j][1]
		except:
			return None
	
	def s(self, k, v):
		k = str(k)
		z = self.look(k)
		i = z[0]; j = z[1]
		#print("set",i,j)
		if (j < 0):
			self.data[i].append([k, v])
		else:
			self.data[i][j][1] = v

class MapReduce:
	def __init__(self):
		self.pids = []
	
	def map(self):
		self.pids = []
		self.data = []
		for x in range(0, 3):
			(r, w) = os.pipe()
			p = os.fork()
			if (p == 0):
				os.close(r)
				w = os.fdopen(w, "w")
				f = open("/tmp/textfile%d.txt" % (x), "r")
				d = f.read().lower().replace("\r", " ").replace("\n", " ")
				d = re.sub("[^a-z]+", " ", d)
				d = re.sub(" [ ]+", " ", d)
				l = d.split(" ")
				f.close()
				m = {}
				for n in l:
					if (not n):
						continue
					if (not n in m.keys()):
						m[n] = 0
					m[n] += 1
				g = {}
				for k in m.keys():
					if (not m[k] in g.keys()):
						g[m[k]] = []
					g[m[k]].append(k)
				l = g.keys()
				l = sorted(l)
				for k in l:
					w.write("%d %s\n" % (k, " ".join(g[k])))
				w.close()
				sys.exit(0)
			else:
				os.close(w)
				r = os.fdopen(r, "r")
				self.pids.append([p, r, "", 0])
		while (1):
			inplist = []
			for c in self.pids:
				if (c[3] == 0):
					inplist.append(c[1])
			if (len(inplist) < 1):
				break
			(inpready, outready, errready) = select.select(inplist, [], [])
			for s in inpready:
				for c in self.pids:
					if (c[1] == s):
						d = c[1].read()
						if (not d):
							c[3] = 1
						else:
							c[2] += d
		for c in self.pids:
			os.wait()
		for c in self.pids:
			for l in c[2].split("\n"):
				self.data.append(l)
	
	def reduce(self):
		self.count = {}
		for l in self.data:
			if (l.strip() == ""):
				continue
			m = l.split(" ")
			n = int(m[0])
			for w in m[1:]:
				if (not w in self.count.keys()):
					self.count[w] = 0
				self.count[w] += n
	
	def search(self, word):
		try:
			return [word, self.count[word]]
		except:
			return [word, 0]

def main():
	print("")
	
	ht = HashTable()
	i = 4
	for k in ["jonnjonn", "jonnjono", "jonxjonp", "jonpjonx"]:
		print("key-hash",k,"->",hex(ht.hash(str(k))))
		ht.s(k, {"uid":k,"name":"Jon Chipps","uid":i,"gid":i,"home":"/home/%s"%(k)})
		print("key-value:",k,"=",ht.g(k))
		ht.s(k, {"uid":k,"name":"Jon Chipps","uid":i,"gid":i,"home":"/home/_%s_"%(k)})
		print("key-value:",k,"=",ht.g(k))
		print("")
		i += 1
	
	mr = MapReduce()
	mr.map()
	mr.reduce()
	print("map-reduce","search",mr.search("better"))
	
	print("")
	
	numa = []; numb = []; numc = []
	#sys.stderr.write(str(2**18)+"\n")
	for x in range(0, 2**18):
		r = int(random.random() * (10**7))
		numa.append(r); numb.append(r); numc.append(r)
		#sys.stderr.write(str(r)+"\n")
	numa.append(51966); numb.append(51966); numc.append(51966)
	print("Sorting...")
	a=time.time();m=merge_r(numa);b=time.time();h=hashlib.md5(str(m)).hexdigest();print("merge_r:",len(m),"items","took",b-a,"seconds",h[0:8])
	a=time.time();q=quick_r(numb);b=time.time();i=hashlib.md5(str(q)).hexdigest();print("quick_r:",len(q),"items","took",b-a,"seconds",i[0:8])
	a=time.time();r=radixlsd_f(numc);b=time.time();j=hashlib.md5(str(r)).hexdigest();print("radixlsd_f:",len(r),"items","took",b-a,"seconds",j[0:8])
	if ((h != i) or (i != j)):
		for x in range(0, 2**18):
			sys.stderr.write(str(m[x])+" "+str(q[x])+" "+str(r[x])+"\n")
		sys.exit(0)
	print("Searching...")
	a=time.time();k=bin_search_r(m, 51966);b=time.time();print("bin_search_r:",51966 in m,"index",k,"=",m[k],"took",b-a,"seconds")
	
	print("")

if (__name__ == "__main__"):
	main()

mr.c

char **mr_data = NULL;
int mr_llen = 0;
char **mr_words = NULL;
int *mr_counts = NULL;

int mr_index(char **list, int size, char *item)
{
	int x;
	for (x = 0; x < size; ++x)
	{
		if (strcmp(list[x], item) == 0)
		{
			return x;
		}
	}
	return -1;
}

void mr_lower(char *word)
{
	while (*word != '\0')
	{
		*word = (*word | 0x60);
		++word;
	}
}

char *mr_word(char *word)
{
	int x;
	char *c = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
	char *a = NULL, *b = NULL;
	while ((a == NULL) || (b == NULL))
	{
		if (*word == '\0') { break; }
		if (a == NULL)
		{
			int f = 0;
			for (x = 0; x < 52; ++x)
			{
				if (*word == c[x]) { f = 1; }
			}
			if (f == 1) { a = word; }
		}
		else if (b == NULL)
		{
			int f = 0;
			for (x = 0; x < 52; ++x)
			{
				if (*word == c[x]) { f = 1; }
			}
			if (f == 0) { b = word; }
		}
		++word;
	}
	if (b != NULL) { *b = '\0'; }
	return a;
}

void mr_map()
{
	int x, y, z;
	long size;
	char filename[1024];
	int rw[3][2];
	char *data = NULL;
	int cmax = 1;
	
	mr_llen = 0;
	mr_words = NULL;
	mr_counts = NULL;
	
	for (x = 0; x < 3; ++x)
	{
		pipe(rw[x]);
		pid_t p = fork();
		if (p == 0)
		{
			close(rw[x][0]);
			
			bzero(filename, 1020 * sizeof(char));
			snprintf(filename, 1010, "/tmp/textfile%d.txt", x);
			FILE *f = fopen(filename, "r");
			
			fseek(f, 0, SEEK_END);
			size = ftell(f);
			fseek(f, 0, SEEK_SET);
			
			data = malloc((size + 24) * sizeof(char));
			bzero(data, (size + 20) * sizeof(char));
			fread(data, size + 10, sizeof(char), f);
			fclose(f);
			
			char *w = data;
			while ((w = mr_word(w)) != NULL)
			{
				mr_lower(w);
				//printf("[%d] (%s)\n", x, w);
				int i = mr_index(mr_words, mr_llen, w);
				if (i < 0)
				{
					mr_llen = (mr_llen + 1);
					mr_words = realloc(mr_words, mr_llen * sizeof(char *));
					mr_counts = realloc(mr_counts, mr_llen * sizeof(int));
					i = (mr_llen - 1);
					mr_words[i] = strdup(w);
					mr_counts[i] = 0;
				}
				mr_counts[i] += 1;
				cmax = max(cmax, mr_counts[i]);
				w += (strlen(w) + 1);
			}
			
			write(rw[x][1], &size, 1 * sizeof(long));
			
			for (y = 1; y <= cmax; ++y)
			{
				int f = 0;
				bzero(data, (size + 20) * sizeof(char));
				snprintf(data, (size + 10) * sizeof(char), "%d", y);
				for (z = 0; z < mr_llen; ++z)
				{
					if (mr_counts[z] == y)
					{
						strncat(data, " ", size + 10 - strlen(data));
						strncat(data, mr_words[z], size + 10 - strlen(data));
						free(mr_words[z]);
						f = 1;
					}
				}
				strncat(data, "\n", size + 10 - strlen(data));
				if (f == 1) { write(rw[x][1], data, strlen(data)); }
			}
			
			close(rw[x][1]);
			free(data);
			free(mr_counts);
			free(mr_words);
			exit(0);
		}
		else
		{
			close(rw[x][1]);
		}
	}
	
	mr_data = malloc(3 * sizeof(char *));
	for (x = 0; x < 3; ++x)
	{
		read(rw[x][0], &size, 1 * sizeof(long));
		mr_data[x] = malloc((size + 24) * sizeof(char));
		bzero(mr_data[x], (size + 20) * sizeof(char));
		char *w = mr_data[x];
		while (1)
		{
			mr_llen = read(rw[x][0], w, size + 10 - strlen(mr_data[x]));
			if (mr_llen < 1) { break; }
			w += mr_llen;
		}
		//printf("%s\n",mr_data[x]);
		close(rw[x][0]);
		wait(NULL);
	}
}

void mr_reduce()
{
	int x;
	
	mr_llen = 0;
	mr_words = NULL;
	mr_counts = NULL;
	
	for (x = 0; x < 3; ++x)
	{
		char *w = mr_data[x], *p, *q;
		int c = 0, f = 0;
		while (*w != '\0')
		{
			if (*w == '\n') { c = 0; f = 0; ++w; continue; }
			if (*w == ' ') { f += 1; ++w; continue; }
			if (f == 0) { c = ((c * 10) + (*w - '0')); ++w; continue; }
			else
			{
				p = strchr(w, ' ');
				q = strchr(w, '\n');
				if (p == NULL) { p = q; }
				else if (q != NULL) { if (q < p) { p = q; } }
				if (p != NULL)
				{
					char t = *p;
					*p = '\0';
					
					//printf("mr-add[%d]+=(%s:%d)\n", x, w, c);
					
					int i = mr_index(mr_words, mr_llen, w);
					if (i < 0)
					{
						mr_llen = (mr_llen + 1);
						mr_words = realloc(mr_words, mr_llen * sizeof(char *));
						mr_counts = realloc(mr_counts, mr_llen * sizeof(int));
						i = (mr_llen - 1);
						mr_words[i] = strdup(w);
						mr_counts[i] = 0;
					}
					mr_counts[i] += c;
					
					*p = t;
					w = p;
					continue;
				}
			}
		}
	}
}

int mr_search(char *word)
{
	int i = mr_index(mr_words, mr_llen, word);
	if (i > -1)
	{
		return mr_counts[i];
	}
	return -1;
}

ht.c

#define ht_size 65536
struct ht_data_s { char *k; void *v; struct ht_data_s *n; };
typedef struct ht_data_s ht_data_t;
ht_data_t ht_data[ht_size];
int ht_stat = 0;

unsigned long ht_hash(char *data)
{
	unsigned long i = 0, l = strlen(data), j = 0;
	unsigned long p = 0, z = l, h = 0;
	while ((j < l) || (j < 256))
	{
		unsigned int c = data[i];
		unsigned int s = (24 - ((j % 4) * 8));
		p = (p | (c << s));
		if (s == 0)
		{
			h = (h ^ (p ^ z));
			p = 0;
		}
		z = (((z+(c+j)) + (z^(c^j)) + (z*(c*j)) + (z&(c&j))) & 0xffffffff);
		i = ((i + 1) % l);
		j = (j + 1);
	}
	if (p != 0)
	{
		h = (h ^ (p ^ z));
	}
	return h;
}

ht_data_t *ht_look(unsigned long *i, long *j, char *k)
{
	if (ht_stat == 0)
	{
		bzero(ht_data, ht_size * sizeof(ht_data_t));
		ht_stat = 1;
	}
	*i = (ht_hash(k) % ht_size);
	*j = -1;
	ht_data_t *ht_item = &(ht_data[*i]);
	while ((*ht_item).k != NULL)
	{
		if (strcmp((*ht_item).k, k) == 0)
		{
			*j = (((*j) + 1) * -1);
			break;
		}
		*j = ((*j) - 1);
		if ((*ht_item).n == NULL)
		{
			break;
		}
		ht_item = (*ht_item).n;
	}
	printf("look:%lu/%ld/%s:%p/%p/%p\n", *i, *j, k, (*ht_item).k, (*ht_item).v, (*ht_item).n);
	return ht_item;
}

void *ht_g(char *k)
{
	unsigned long i; long j;
	ht_data_t *ht_item = ht_look(&i, &j, k);
	return (*ht_item).v;
}

void ht_s(char *k, void *v)
{
	unsigned long i; long j;
	ht_data_t *ht_item = ht_look(&i, &j, k);
	if (j < 0)
	{
		if ((*ht_item).k == NULL)
		{
			(*ht_item).k = strdup(k);
			(*ht_item).v = v;
			(*ht_item).n = NULL;
		}
		else
		{
			ht_data_t *ht_temp = malloc(1 * sizeof(ht_data_t));
			(*ht_temp).k = strdup(k);
			(*ht_temp).v = v;
			(*ht_temp).n = NULL;
			(*ht_item).n = ht_temp;
		}
	}
	else
	{
		if ((*ht_item).v != NULL)
		{
			free((*ht_item).v);
		}
		(*ht_item).v = v;
	}
}

sorts.c

//gcc -Wall -o3 -lm -o sorts sorts.c

int max(a, b) { if (a < b) { return b; } return a; }

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
#include <time.h>
#include <unistd.h>

#include <sys/types.h>
#include <sys/wait.h>

#include "ht.c"
#include "mr.c"

int merge_r(int *list, int size)
{
	if (size < 2) { return 0; }
	int a[size];
	bcopy(list, a, size * sizeof(int));
	int q = (size / 2);
	int n = (size - q);
	int *l = a;
	int *r = (a + q);
	merge_r(l, q);
	merge_r(r, n);
	int i = 0, li = 0, ri = 0;
	while (i < size)
	{
		if ((li < q) && (ri < n))
		{
			if (l[li] < r[ri]) { list[i] = l[li]; i += 1; li += 1; }
			else { list[i] = r[ri]; i += 1; ri += 1; }
		}
		else
		{
			if (li >= q)
			{
				while (ri < n) { list[i] = r[ri]; i += 1; ri += 1; }
			}
			if (ri >= n)
			{
				while (li < q) { list[i] = l[li]; i += 1; li += 1; }
			}
		}
	}
	return 0;
}

int quick_r(int *list, int size)
{
	if (size < 2) { return 0; }
	int lptr = 0, pivp = (size / 2), rptr = (size - 1);
	int pivv = list[pivp];
	while (1)
	{
		if (lptr > rptr) { break; }
		while (list[lptr] < pivv) { lptr += 1; }
		while (list[rptr] > pivv) { rptr -= 1; }
		if (lptr <= rptr)
		{
			int temp = list[lptr]; list[lptr] = list[rptr]; list[rptr] = temp;
			lptr += 1; rptr -= 1;
		}
	}
	quick_r(list, rptr + 1);
	quick_r(list + lptr, size - lptr);
	return 0;
}

int radixlsd_f(int *list, int size)
{
	int i = 0, m = 1;
	int lengths[10], sizes[10];
	int **buckets = malloc(10 * sizeof(int *));
	int x, y, z;
	int d, b, p;
	bzero(sizes, 10 * sizeof(int));
	for (y = 0; y < 10; ++y) { buckets[y] = NULL; }
	while (i < m)
	{
		bzero(lengths, 10 * sizeof(int));
		x = 0; p = pow(10, i);
		while (x < size)
		{
			if ((i == 0) && (abs(list[x]) > m))
			{
				m = list[x];
			}
			b = ((list[x] / p) % 10);
			lengths[b] += 1;
			if (lengths[b] > sizes[b])
			{
				sizes[b] += 10;
				buckets[b] = realloc(buckets[b], sizes[b] * sizeof(int));
			}
			buckets[b][lengths[b] - 1] = list[x];
			x += 1;
		}
		x = 0;
		for (y = 0; y < 10; ++y)
		{
			for (z = 0; z < lengths[y]; ++z)
			{
				list[x] = buckets[y][z]; x += 1;
			}
		}
		if (i == 0)
		{
			d = 0;
			while (m != 0)
			{
				m = (m / 10);
				d += 1;
			}
			m = d;
		}
		i += 1;
	}
	return 0;
}

int bin_search_rp(int *list, int item, int lo, int hi)
{
	int mid = ((hi - lo) / 2);
	//print(lo,mid,hi,list[lo+mid])
	if (list[lo + mid] == item)
	{
		return (lo + mid);
	}
	if ((hi - lo) < 1)
	{
		return -1;
	}
	if (list[lo + mid] < item)
	{
		return bin_search_rp(list, item, lo+mid+1, hi);
	}
	return bin_search_rp(list, item, lo, hi-mid-1);
}

int bin_search_r(int *list, int size, int item)
{
	return bin_search_rp(list, item, 0, size);
}

int main(int argc, char **argv)
{
	printf("\n");
	
	int i = 0;
	char *keys[4] = { "jonnjonn", "jonnjono", "jonxjonp", "jonpjonx" };
	char temp[1028];
	for (i = 0; i < 4; ++i)
	{
		printf("key-hash:%s=0x%lx\n", keys[i], ht_hash(keys[i]));
		
		bzero(temp, 1028 * sizeof(char));
		snprintf(temp, 1024, "{uid:%s, name:Jon Chipps, uid:%d, gid:%d, home:/home/%s}", keys[i], i + 1, i + 1, keys[i]);
		ht_s(keys[i], strdup(temp));
		printf("%s=%s\n", keys[i], (char *)ht_g(keys[i]));
		
		bzero(temp, 1028 * sizeof(char));
		snprintf(temp, 1024, "{uid:%s, name:Jon Chipps, uid:%d, gid:%d, home:/home/_%s_}", keys[i], i + 1, i + 1, keys[i]);
		ht_s(keys[i], strdup(temp));
		printf("%s=%s\n", keys[i], (char *)ht_g(keys[i]));
		
		printf("\n");
	}
	
	int x, n = 262145;
	int numa[n], numb[n], numc[n];
	//printf("%d\n", n);
	srand(time(NULL));
	for (x = 0; x < n; ++x)
	{
		numa[x] = (rand() % 1000000);
		numb[x] = numa[x];
		numc[x] = numb[x];
		//printf("%d\n", numa[x]);
	}
	numa[n - 1] = 51966; numb[n - 1] = 51966; numc[n - 1] = 51966;
	
	clock_t starta = clock() / (CLOCKS_PER_SEC / 1000);
	merge_r(numa, n);
	clock_t enda = clock() / (CLOCKS_PER_SEC / 1000);
	unsigned long millisa = (enda - starta);
	printf("merge-r:%lu/ms\n", millisa);
	
	clock_t startb = clock() / (CLOCKS_PER_SEC / 1000);
	quick_r(numb, n);
	clock_t endb = clock() / (CLOCKS_PER_SEC / 1000);
	unsigned long millisb = (endb - startb);
	printf("quick-r:%lu/ms\n", millisb);
	
	clock_t startc = clock() / (CLOCKS_PER_SEC / 1000);
	radixlsd_f(numc, n);
	clock_t endc = clock() / (CLOCKS_PER_SEC / 1000);
	unsigned long millisc = (endc - startc);
	printf("radixlsd-r:%lu/ms\n", millisc);
	
	int f = bin_search_r(numa, n, 51966);
	printf("binsearch-r[%d]=%d\n", f, numa[f]);
	
	printf("\n");
	
	mr_map();
	mr_reduce();
	printf("mapreduce-search('better'):%d\n", mr_search("better"));
	
	printf("\n");
	
	for (x = 0; x < n; ++x)
	{
		fprintf(stderr, "%d %d %d\n", numa[x], numb[x], numc[x]);
	}
	
	return 0;
}

Advertisements
[long code post warning] Studying some concepts related to Google: Sorts, Searches, Hash-Tables, & Map-Reduce

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s