\documentclass[11pt]{article}
%\usepackage{pstricks,pst-node}
\usepackage{alltt,fullpage,graphics,color,epsfig,amsmath, amssymb,comment}
\usepackage{hyperref}
\usepackage{boxedminipage}
\usepackage{mathtools}
%\newcommand{\edgee}[1]{\begin{math}\stackrel{#1}{\longrightarrow}\end{math}}
\newcommand{\floor}[1]{\lfloor #1 \rfloor}
\newcommand{\ceil}[1]{\lceil #1 \rceil}
\DeclareMathOperator*{\E}{\mathbb{E}}
\newcommand{\eps}{\epsilon}
\newcommand{\CU}{\mathcal U}
\newcommand{\CH}{\mathcal H}
%\newcommand{\Pr}{\text{Pr}}
\begin{document}
\setlength{\parskip}{.1 in}
\begin{center}
\LARGE
\textbf{CS 473: Algorithms, Spring 2018}
\\[0.5ex]
\textbf{HW 5 (due Wednesday, March 7th at 8pm)}
\end{center}
\noindent
This homework contains three problems. {\bf Read the instructions for
submitting homework on the course webpage}.
\noindent {\bf Collaboration Policy:} For this home work, each student
can work in a group with upto three members. Only one solution for
each group needs to be submitted. Follow the submission instructions
carefully.
\begin{enumerate}
%----------------------------------------------------------------------
\item Your boss wants you to find a \textit{perfect} hash function for mapping a known set of $n$ items into
a table of size $m$. A hash function is \textit{perfect} if there are \textit{no} collisions; each of the $n$ items
is mapped to a different slot in the hash table. Of course, a perfect hash function is only possible if $m \geq n$.
After cursing your algorithms instructor for not teaching you about (this kind of) perfect hashing, you decide to try something simple: repeatedly pick {\em ideal random} hash functions until you find one that happens to be perfect.
{\em Ideal randomness} means that the hash function is chosen uniformly at random from the set of all functions from
$\CU$ to $\{0, 1, \cdots , (m-1)\}$. Intuitively, an ideal random hash function is a function $h : \mathcal{U} \to \{ 0, 1,\cdots, m-1\}$
such that for each $x\in \CU$ the value of $h(x)$ is decided by rolling an unbiased $m$-sided die.
\begin{enumerate}
\item Suppose you pick an ideal random hash function $h$. What is the \textit{exact} expected
number of pair-wise collisions, as a function of $n$ (the number of items) and $m$ (the size of the
table)? Donâ€™t worry about how to resolve collisions; just count them. For $x \neq y$, if $h(x)=h(y)$ then we say that $(x,y)$ constitute a pair-wise collision.
\item What is the \textit{exact} probability that a random hash function is perfect?
\item What is the \textit{exact} expected number of different random hash functions you have to
test before you find a perfect hash function?
\item What is the \textit{exact} probability that none of the first $N$ random hash functions you try is
perfect?
\item How many ideal random hash functions do you have to test to find a perfect hash
function \textit{with high probability}?
\end{enumerate}
\item {\em Tabulated hashing} uses tables of random numbers to compute hash values.
Suppose $|\mathcal{U}| = 2^w \times 2^w$ and $m = 2^l$, so that the items being hashed
are pairs $(x,y)$ where $x$ and $y$ are $w$-bit strings (or $2w$-bit strings broken in half), and hash values are
$l$-bit strings.
Let $A[0 \cdots 2^w-1]$ and $B[0 \cdots 2^w-1]$ be arrays of %independent random
$l$-bit strings ($A$ and $B$ can be though of as $2^w \times l$ dimensional array of bits).
Define the has function $h_{A, B} : \mathcal{U} \to [m]$ by setting
\[
h_{A, B}(x, y) \coloneqq A[x] \oplus B[y]
\]
where $\oplus$ denotes bit-wise exclusive-or. Let $\mathcal{H}'$ denote the set of all
possible functions $h_{A, B}$. Note that sampling an $h_{A, B} \in \mathcal{H}'$ uniformly
at random is equivalent to setting every bit of the arrays $A$ and $B$ to $0$ or $1$
uniformly at random.
For an integer $k>0$, we say that a family of hash functions $\CH$ mapping $\CU$ to $\{0,1,\cdots,(m-1)\}$ is {\em $k$-uniform}
if for any sequence of $k$ disjoint keys and any sequence of $k$
hash values, the probability that each key maps to the corresponding hash value is $\frac{1}{m^k}$
\[
\Pr_{h \sim \CH} \left[ \bigwedge_{j = 1}^k {h(x_j) = i_j} \right] = \frac{1}{m^k} \ \ \ \text{for all distinct } x_1, \cdots x_k\in \CU, \text{ and all } i_1, \cdots i_k \in \{0,\dots,(m-1)\}
\]
In the above, $h \sim \CH$ means function $h$ is picked uniformly at random from family $\CH$. (For more details on $k$-uniform family of hash functions, see Jeff's notes (page 3): \url{https://courses.engr.illinois.edu/cs473/sp2016/notes/12-hashing.pdf}.)
\begin{enumerate}
\item Prove that $\mathcal{H}'$ is $2$-uniform.
\item Prove that $\mathcal{H}'$ is $3$-uniform. \textit{[Hint: Solve part $(a)$ first.]}
\item Prove that $\mathcal{H}'$ is {\em not} $4$-uniform.
\end{enumerate}
Yes, ``see part $(b)$" is worth full credit for part $(a)$, but only if your solution to part $(b)$ is correct.
\item In lecture we discussed the Karp-Rabin randomized algorithm for
pattern matching. The power of randomization is seen by considering
the {\em two-dimensional} pattern matching problem. The input
consists of an {\em arbitrary} $n \times n$ binary matrix $T$ and an {\em arbitrary} $m \times m$
binary matrix $P$, where $m m/2$ is at least $0.99$.
Here $\tilde{F}[j]$ is the estimated frequency of $j$ from the sketch
and $F[j]$ is the true frequency.
\item There is another very useful sketch called the Count sketch.
You can read about it, if you are interested. \url{https://courses.engr.illinois.edu/cs598csc/fa2014/Lectures/lecture_6.pdf}
\end{itemize}
\end{comment}
\end{document}