Huffman Lecture

Contents

Introduction of matlab functions

%Generate a data sequence to encode.
sig = repmat([3 3 1 3 3 3 3 3 2 3],1,50);
figure(1),plot(1:100,sig(1:100),'or-')
title('First 100 samples of "sig"')

Define the set of data symbols and the probability associated with each element.

symbols = [1 2 3];
p = [0.1 0.1 0.8];

Create the Huffman code dictionary.

dict = huffmandict(symbols,p);

Encode and decode the data. Verify that the original data, sig, and the decoded data, dhsig, are identical.

hcode = huffmanenco(sig,dict);
dhsig = huffmandeco(hcode,dict);
isequal(sig,dhsig)
ans =

     1

Compute the entropy, the theoretical average codelength of Huffman coding, and the real obtained average codelength

entropy = -sum(p.*log2(p))
CL_symb = zeros(size(symbols));
for i =1:length(symbols)
    CL_symb(i) = length(dict{i,2});
end
CL_huffman = sum(p.*CL_symb)  % theoretical average codelength
length(hcode)/length(sig); % real obtained average codelength
[CL_huffman length(hcode)/length(sig) entropy]
entropy =

    0.9219


CL_huffman =

    1.2000


ans =

    1.2000    1.2000    0.9219

Encode cameraman image (it may take a minute)

InputImage =double(imread( 'cameraman.tif' ));

Number of occurences of a gray level in the image

hh = hist( InputImage(:),0:255);

Empirical probability of each gray level symbol in the image

symbols = 0:255;
pp = hh/sum(hh);
ind = find(pp >0);
% keep aligned the vector of symbols and the vector of their probabilities
ExistingSymbols = symbols(ind);
SymbProbabilities = pp(ind);

H = - sum(SymbProbabilities.*log2(SymbProbabilities))
H =

    7.0097

Build the dictionary

dict = huffmandict(ExistingSymbols,SymbProbabilities);

Check the theoretical average codelength

CL_symb = zeros(size(ExistingSymbols));
for i =1:length(ExistingSymbols)
    CL_symb(i) = length(dict{i,2});
end
CL_huffman = sum(SymbProbabilities.*CL_symb)
CL_huffman =

    7.0448

Encode and decode the data.

tic
 hcode = huffmanenco(InputImage(:),dict);
 toc
 tic
 dhsig = huffmandeco(hcode,dict);
 toc
Elapsed time is 5.628822 seconds.
Elapsed time is 56.214861 seconds.

Check the theoretical against real average codelength

[CL_huffman length(hcode)/length(InputImage(:))]
ans =

    7.0448    7.0448

Check the correctness of the decoded signal

isequal(InputImage(:),dhsig)
[sum( InputImage(:)~=dhsig ) sum( InputImage(:)==dhsig )]
DecodedImage = reshape(dhsig,[256,256]);
figure(2),imagesc(DecodedImage),colormap(gray)
ans =

     1


ans =

           0       65536

A faster encoding algorithm than matlab "huffmanenco"

tic
MCL = max(CL_symb);
N=length(ExistingSymbols);
CodeExt = -1*ones(MCL,N);
EncodedMatrix = zeros(MCL,length(InputImage(:)));
for i =1:N
    CL_s  = length(dict{i,2});
    CodeExt(1:CL_s,i) = dict{i,2}';
end
for i =1:N
    ind = find(InputImage(:) == ExistingSymbols(i));
    EncodedMatrix(1:MCL,ind) = CodeExt(1:MCL,i)*ones(1,length(ind));
end
ind = find( EncodedMatrix(:) ~= -1);
EncodedStream = EncodedMatrix(ind);
length( EncodedStream )
[sum(  EncodedStream  ~= hcode)  sum(  EncodedStream  == hcode)]
toc
ans =

      461689


ans =

           0      461689

Elapsed time is 0.056134 seconds.

A faster decoding algorithm than matlab "huffmandeco"

tic
bb = EncodedStream';
Ne = length( EncodedStream );
for i = 2:MCL
    bb = [bb ;EncodedStream(i:end)' -1*ones(1,i-1)];
end
% size(bb)
bbi = zeros(1,Ne);
for i =1:N
    CL_s  = length(dict{i,2});
    CodeExti = dict{i,2}';
    ind = find( sum(bb(1:CL_s,:) ==   CodeExti*ones(1,Ne))==CL_s);
    bbi(ind) = i;
    CL_si(i) = CL_s;
end
i = 0;
start = 1;
Ni = length(InputImage(:));
DecodedStr = zeros(Ni,1);
while(i < Ni)
    i = i+1;
    j = bbi(start);
    start = start + CL_si(j);
    DecodedStr(i) = ExistingSymbols(j);
end
toc
Elapsed time is 13.827782 seconds.

Check the correctness of the decoded signal

isequal(InputImage(:), DecodedStr)
[sum( InputImage(:)~=DecodedStr ) sum( InputImage(:)==DecodedStr )]
ans =

     1


ans =

           0       65536