2006. január 10., kedd

Simple Implementation of LZW Compression/Decompression Algorithm


Problem/Question/Abstract:

How do I Compress and Decompress files using LZW Algorithm.

Answer:

Here is a simple implemntation of LZW compression/Decompression algorithm. It is not fast and compression ratio is very small. Here is the code.

unit RevLZW;

interface

uses
  sysutils, classes, dialogs, windows;

const
  tabsize: integer = 4095;
  copybyte: integer = 0;
  compbyte: integer = 1;
  endlist: integer = -1;
  nochar: integer = -2;
  empty: integer = -3;
  eofchar: integer = -4;
  bufsize: integer = 32768;
  maxstack: integer = 4096;
type
  TStringObject = record
    prevchar: integer;
    nextchar: integer;
    next: integer;
    used: boolean;
    nused: integer;
    flocked: boolean;
  end;

procedure Initialize;
procedure Terminate;
function OpenInputFile(fname: string): boolean;
function OpenOutputFile(fname: string): boolean;
function getbyte: integer;
procedure putbyte(c: integer);
procedure compress;
procedure decompress;
procedure putcode(code: integer; lbyte: boolean = false);
function getcode: integer;
function GetHashCode(prevc, nextc: integer): integer;
function findstring(prevc, nextc: integer): integer;
function MakeTableEntry(prevc: integer; nextc: integer): boolean;
procedure push(c: integer);
procedure pop(var c: integer);
procedure InitializeStringTable;

var
  fsize: integer;
  fread, fwrote: integer;
  ihandle, ohandle: integer;
  inbufpos, outbufpos: integer;
  objectid: integer;
  stringtable: array[0..4095] of TstringObject;
  inblock: array[0..65535 {32767}] of char;
  outblock: array[0..65535 {32767}] of char;
  stack: array[0..4095] of char;
  stackpointer: integer;
  rembits: integer;
  lastbyte: boolean;
  rembitcount: integer;
  lzwerr: boolean;
  imap, omap: integer;
implementation

function OpenInputFile(fname: string): boolean;
begin
  result := true;
  ihandle := fileopen(fname, fmShareExclusive or fmOpenRead);
  fsize := getfilesize(ihandle, nil);
  if fsize < 32768 then
    fileread(ihandle, inblock, fsize)
  else
    fileread(ihandle, inblock, 32768);
  if ihandle = -1 then
    result := false;
end;

function OpenOutputFile(fname: string): boolean;
begin
  result := true;
  ohandle := filecreate(fname);
  if ohandle = -1 then
    result := false;
end;

function getbyte: integer;
begin
  if inbufpos = 32768 then
  begin
    inbufpos := 0;
    fileread(ihandle, inblock, 32768);
  end;
  if fread = fsize then
    result := eofchar
  else
    result := integer(inblock[inbufpos]);
  inc(inbufpos);
  inc(fread);
end;

procedure putbyte(c: integer);
begin
  if outbufpos = 32768 then
  begin
    outbufpos := 0;
    filewrite(ohandle, outblock, 32768);
  end;
  outblock[outbufpos] := char(c);
  inc(outbufpos);
  inc(fwrote);
end;

procedure Initialize;
begin
  inbufpos := 0;
  outbufpos := 0;
  fread := 0;
  fwrote := 0;
  objectid := 0;
  stackpointer := 0;
  lastbyte := false;
  rembits := empty;
  rembitcount := 0;
  lzwerr := false;
  InitializeStringtable;
end;

procedure InitializeStringTable;
var
  i: integer;
begin
  objectid := 0;
  for i := 0 to 4095 do
  begin
    with stringtable[i] do
    begin
      if not flocked then
      begin
        prevchar := nochar;
        nextchar := nochar;
        next := endlist;
        used := false;
        nused := 0;
        flocked := false;
      end;
    end;
    if i <= 255 then
    begin
      stringtable[i].nextchar := i;
      stringtable[i].used := true;
      inc(objectid);
    end;
  end;
end;

procedure Terminate;
begin
  if outbufpos > 0 then
    filewrite(ohandle, outblock, outbufpos);
  setendoffile(ohandle);
  fileclose(ihandle);
  fileclose(ohandle);
end;

function GetHashCode(prevc, nextc: integer): integer;
var
  index, newindex: integer;
begin
  index := ((prevc shl 5) xor nextc) and tabsize;
  if not stringtable[index].used then
    result := index
  else
  begin
    while stringtable[index].next <> endlist do
      index := stringtable[index].next;
    newindex := index and tabsize;
    while stringtable[newindex].used do
      newindex := succ(newindex) and tabsize;
    stringtable[index].next := newindex;
    result := newindex;
  end;
end;

function findstring(prevc, nextc: integer): integer;
var
  index: integer;
  found: boolean;
begin
  result := endlist;
  if (prevc = nochar) and (nextc <= 255) then
    result := nextc
  else
  begin
    index := ((prevc shl 5) xor nextc) and tabsize;
    repeat
      found := (stringtable[index].prevchar = prevc) and (stringtable[index].nextchar
        = nextc);
      if not found then
        index := stringtable[index].next;
    until found or (index = endlist);
    if found then
    begin
      result := index;
      inc(stringtable[index].nused);
    end;
  end;
end;

function MakeTableEntry(prevc: integer; nextc: integer): boolean;
var
  index: integer;
begin
  result := true;
  if objectid <= tabsize then
  begin
    index := gethashcode(prevc, nextc);
    with stringtable[index] do
    begin
      prevchar := prevc;
      nextchar := nextc;
      used := true;
    end;
    inc(objectid);
    if objectid = tabsize + 1 then
      result := false;
  end;
end;

procedure putcode(code: integer; lbyte: boolean);
var
  tmpcode: integer;
begin
  if stringtable[code].prevchar = nochar then
  begin
    if rembitcount < 7 then
    begin
      tmpcode := (rembits shl (8 - rembitcount)) or (copybyte shl (7 - rembitcount))
        or ((code shr (rembitcount + 1)) and ($7F shr rembitcount));
      putbyte(tmpcode);
      inc(fwrote);
      rembits := code and ($FF shr (7 - rembitcount));
      inc(rembitcount);
    end
    else if rembitcount = 7 then
    begin
      tmpcode := (rembits shl 1) or copybyte;
      putbyte(tmpcode);
      inc(fwrote, 2);
      putbyte(code);
      rembits := empty;
      rembitcount := 0;
    end;
  end
  else
  begin
    tmpcode := (rembits shl (8 - rembitcount)) or (compbyte shl (7 - rembitcount)) or
      (code shr (5 + rembitcount) and ($7F shr rembitcount));
    putbyte(tmpcode);
    inc(fwrote);
    rembitcount := rembitcount + 5;
    if rembitcount < 8 then
      rembits := code and ($FF shr (8 - rembitcount));
    if rembitcount >= 8 then
    begin
      rembits := (code shr (rembitcount - 8)) and $FF;
      inc(fwrote);
      putbyte(rembits);
      rembitcount := rembitcount - 8;
      rembits := code and ($FF shr (8 - rembitcount));
    end;
  end;
  if lbyte and (rembitcount > 0) then
  begin
    tmpcode := ((rembits and ($FF shr (8 - rembitcount))) shl (8 - rembitcount));
    putbyte(tmpcode);
    inc(fwrote);
  end;
end;

function getcode: integer;
var
  part1, part2: integer;
  iscomp: integer;
  c1, c2: integer;
begin
  result := eofchar;
  if (fread = fsize) and (rembitcount = 0) then
  begin
    result := eofchar;
    exit;
  end;
  if rembitcount = 0 then
  begin
    part1 := getbyte;
    part2 := getbyte;
    iscomp := (part1 shr 7) and 1;
    if iscomp = 1 then
    begin
      c1 := part1 and $7F;
      c2 := (part2 shr 3) and $1F;
      rembits := part2 and $7;
      rembitcount := 3;
      result := (c1 shl 5) or c2;
    end
    else if iscomp = 0 then
    begin
      c1 := part1 and $7F;
      c2 := (part2 shr 7) and $1;
      result := (c1 shl 1) or c2;
      rembits := part2 and $7F;
      rembitcount := 7;
    end;
  end
  else if rembitcount = 1 then
  begin
    part1 := getbyte;
    iscomp := rembits;
    if iscomp = 1 then
    begin
      part2 := getbyte;
      c1 := part1 and $FF;
      c2 := (part2 shr 4) and $F;
      rembits := part2 and $F;
      rembitcount := 4;
      result := (c1 shl 4) or c2;
    end
    else if iscomp = 0 then
    begin
      c1 := part1 and $FF;
      result := c1;
      rembits := empty;
      rembitcount := 0;
    end;
  end
  else if rembitcount = 2 then
  begin
    part1 := getbyte;
    iscomp := (rembits shr 1) and 1;
    if iscomp = 1 then
    begin
      part2 := getbyte;
      c1 := ((rembits and 1) shl 7) or ((part1 shr 1) and $7F);
      c2 := ((part1 and 1) shl 3) or ((part2 shr 5) and $7);
      rembits := part2 and $1F;
      rembitcount := 5;
      result := (c1 shl 4) or (c2 and $F);
    end
    else if iscomp = 0 then
    begin
      c1 := ((rembits and 1) shl 7) or ((part1 shr 1) and $7F);
      result := c1;
      rembits := part1 and 1;
      rembitcount := 1;
    end;
  end
  else if rembitcount = 3 then
  begin
    part1 := getbyte;
    iscomp := (rembits shr 2) and 1;
    if iscomp = 1 then
    begin
      part2 := getbyte;
      c1 := ((rembits and $3) shl 6) or ((part1 shr 2) and $3F);
      c2 := ((part1 and $3) shl 2) or ((part2 shr 6) and $3);
      rembits := part2 and $3F;
      rembitcount := 6;
      result := (c1 shl 4) or (c2 and $F);
    end
    else if iscomp = 0 then
    begin
      c1 := ((rembits and $3) shl 6) or ((part1 shr 2) and $3F);
      result := c1;
      rembits := part1 and $3;
      rembitcount := 2;
    end;
  end
  else if rembitcount = 4 then
  begin
    part1 := getbyte;
    iscomp := (rembits shr 3) and 1;
    if iscomp = 1 then
    begin
      part2 := getbyte;
      c1 := ((rembits and $7) shl 5) or ((part1 shr 3) and $1F);
      c2 := ((part1 and $7) shl 1) or ((part2 shr 7) and $1);
      rembits := part2 and $7F;
      rembitcount := 7;
      result := (c1 shl 4) or (c2 and $F);
    end
    else if iscomp = 0 then
    begin
      c1 := ((rembits and $7) shl 5) or ((part1 shr 3) and $1F);
      result := c1;
      rembits := part1 and $7;
      rembitcount := 3;
    end;
  end
  else if rembitcount = 5 then
  begin
    part1 := getbyte;
    iscomp := (rembits shr 4) and 1;
    if iscomp = 1 then
    begin
      c1 := ((rembits and $F) shl 4) or ((part1 shr 4) and $F);
      c2 := part1 and $F;
      rembits := empty;
      rembitcount := 0;
      result := (c1 shl 4) or (c2 and $F);
    end
    else if iscomp = 0 then
    begin
      c1 := ((rembits and $F) shl 4) or ((part1 shr 4) and $F);
      result := c1;
      rembits := part1 and $F;
      rembitcount := 4;
    end;
  end
  else if rembitcount = 6 then
  begin
    part1 := getbyte;
    iscomp := (rembits shr 5) and 1;
    if iscomp = 1 then
    begin
      c1 := ((rembits and $1F) shl 3) or ((part1 shr 5) and $7);
      c2 := (part1 shr 1) and $F;
      rembits := part1 and 1;
      rembitcount := 1;
      result := (c1 shl 4) or (c2 and $F);
    end
    else if iscomp = 0 then
    begin
      c1 := ((rembits and $1F) shl 3) or ((part1 shr 5) and $7);
      result := c1;
      rembits := part1 and $1F;
      rembitcount := 5;
    end;
  end
  else if rembitcount = 7 then
  begin
    part1 := getbyte;
    iscomp := (rembits shr 6) and 1;
    if iscomp = 1 then
    begin
      c1 := ((rembits and $3F) shl 2) or ((part1 shr 6) and $3);
      c2 := (part1 shr 2) and $F;
      rembits := part1 and $3;
      rembitcount := 2;
      result := (c1 shl 4) or (c2 and $F);
    end
    else if iscomp = 0 then
    begin
      c1 := ((rembits and $3F) shl 2) or ((part1 shr 6) and $3);
      result := c1;
      rembits := part1 and $3F;
      rembitcount := 6;
    end;
  end;
end;

procedure compress;
var
  c, wc, w: integer;
begin
  initialize;
  c := getbyte;
  w := findstring(nochar, c);
  c := getbyte;
  while fread <= fsize - 1 do
  begin
    if lastbyte then
    begin
      putcode(w);
      lastbyte := false;
      InitializeStringtable;
      c := getbyte;
      w := findstring(nochar, c);
      c := getbyte;
    end;
    wc := findstring(w, c);
    if wc = endlist then
    begin
      lastbyte := not (MakeTableEntry(w, c));
      putcode(w);
      w := findstring(nochar, c);
    end
    else
      w := wc;
    if not lastbyte then
      c := getbyte;
  end;
  putcode(w, true);
end;

procedure decompress;
var
  unknown: boolean;
  finchar, lastchar: integer;
  code, oldcode, incode: integer;
  c, tempc: integer;
begin
  initialize;
  unknown := false;
  lastchar := empty;
  oldcode := getcode;
  code := oldcode;
  c := stringtable[code].nextchar;
  putbyte(c);
  finchar := c;
  incode := getcode;
  while incode <> eofchar do
  begin
    if lastbyte then
    begin
      lastbyte := false;
      InitializeStringTable;
      stackpointer := 0;
      unknown := false;
      lastchar := empty;
      oldcode := getcode;
      code := oldcode;
      c := stringtable[code].nextchar;
      putbyte(c);
      finchar := c;
      incode := getcode;
    end;
    code := incode;
    if not stringtable[code].used then
    begin
      lastchar := finchar;
      code := oldcode;
      unknown := true;
    end;
    while (stringtable[code].prevchar <> nochar) do
    begin
      push(stringtable[code].nextchar);
      if lzwerr = true then
        break;
      code := stringtable[code].prevchar;
    end;
    if lzwerr = true then
      break;
    finchar := stringtable[code].nextchar;
    putbyte(finchar);
    pop(tempc);
    while (tempc <> empty) do
    begin
      putbyte(tempc);
      pop(tempc);
    end;
    if unknown then
    begin
      finchar := lastchar;
      putbyte(finchar);
      unknown := false;
    end;
    lastbyte := not (maketableentry(oldcode, finchar));
    if not lastbyte then
    begin
      oldcode := incode;
      incode := getcode;
    end
  end;
end;

procedure push(c: integer);
var
  s: string;
begin
  if stackpointer < 4096 then
  begin
    inc(stackpointer);
    stack[stackpointer] := char(c);
  end;
  if stackpointer >= 4096 then
  begin
    s := 'Stack full at ' + inttostr(inbufpos);
    lzwerr := true;
    showmessage(s);
  end;
end;

procedure pop(var c: integer);
begin
  if stackpointer > 0 then
  begin
    c := integer(stack[stackpointer]);
    dec(stackpointer);
  end
  else
    c := empty;
end;

end.

To compress the file add the following code to a button

openinputfile('C:\cdidxtmp\myfile.exe');
openoutputfile('C:\cdidxtmp\myfile.bak');
initialize;
compress;

To Decompress

openinputfile('C:\cdidxtmp\myfile.bak');
openoutputfile('C:\cdidxtmp\myfile.exe');
initialize;
decompress;

Nincsenek megjegyzések:

Megjegyzés küldése