Git stores snapshots of files.
A small Git-like tool needs only a few core ideas:
repository
objects
hashes
commits
branches
checkoutIn this project, we will build a tiny version of Git. It will not be compatible with real Git. The goal is to understand the storage model.
Our tool will support:
init
add
commit
log
checkoutThe Repository Directory
A real Git repository stores data in .git.
Our mini version will store data in .mini-git.
project/
file.txt
.mini-git/
objects/
index
HEADThe objects directory stores file contents and commits.
The index file stores staged files.
The HEAD file stores the latest commit hash.
Initialize a Repository
The init command creates the repository structure.
fn initRepo() !void {
try std.fs.cwd().makePath(".mini-git/objects");
const head = try std.fs.cwd().createFile(".mini-git/HEAD", .{});
defer head.close();
try head.writeAll("");
}This gives us a place to store objects.
Hashing Data
Git identifies objects by hash. We will do the same with SHA-256.
fn hashBytes(bytes: []const u8, out: *[32]u8) void {
std.crypto.hash.sha2.Sha256.hash(bytes, out, .{});
}To use the hash as a filename, encode it as hex.
fn hexHash(hash: [32]u8, buffer: *[64]u8) []const u8 {
return std.fmt.bytesToHex(buffer, &hash, .lower);
}A file object will be stored at:
.mini-git/objects/<hash>Store an Object
fn storeObject(data: []const u8) ![32]u8 {
var hash: [32]u8 = undefined;
hashBytes(data, &hash);
var hex_buffer: [64]u8 = undefined;
const name = hexHash(hash, &hex_buffer);
var path_buffer: [128]u8 = undefined;
const path = try std.fmt.bufPrint(
&path_buffer,
".mini-git/objects/{s}",
.{name},
);
const file = try std.fs.cwd().createFile(path, .{});
defer file.close();
try file.writeAll(data);
return hash;
}This stores raw bytes under their hash.
If the same file content appears twice, it gets the same hash.
That is content-addressed storage.
The Index
Before committing, Git stages files.
Our index will be a plain text file:
path hashExample:
hello.txt 2cf24dba...
main.zig b94d27b9...The add command reads a file, stores its content as an object, and records the path and hash in .mini-git/index.
fn addFile(allocator: std.mem.Allocator, path: []const u8) !void {
const data = try std.fs.cwd().readFileAlloc(allocator, path, 10 * 1024 * 1024);
defer allocator.free(data);
const hash = try storeObject(data);
var hex_buffer: [64]u8 = undefined;
const name = hexHash(hash, &hex_buffer);
const index = try std.fs.cwd().createFile(".mini-git/index", .{
.truncate = false,
});
defer index.close();
try index.seekFromEnd(0);
try index.writer().print("{s} {s}\n", .{ path, name });
}This is simple but imperfect. If you add the same path twice, the index will contain duplicates. A better version would replace old entries.
Commit Objects
A commit records:
parent commit hash
list of staged files
messageOur commit format will be plain text:
parent <hash>
message <message>
file <path> <hash>
file <path> <hash>Create a commit by reading the index and HEAD:
fn readSmallFile(allocator: std.mem.Allocator, path: []const u8) ![]u8 {
return std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024);
}fn commit(allocator: std.mem.Allocator, message: []const u8) !void {
const parent = readSmallFile(allocator, ".mini-git/HEAD") catch "";
defer if (parent.len > 0) allocator.free(parent);
const index = try readSmallFile(allocator, ".mini-git/index");
defer allocator.free(index);
const commit_text = try std.fmt.allocPrint(
allocator,
"parent {s}\nmessage {s}\n\n{s}",
.{ parent, message, index },
);
defer allocator.free(commit_text);
const hash = try storeObject(commit_text);
var hex_buffer: [64]u8 = undefined;
const name = hexHash(hash, &hex_buffer);
const head = try std.fs.cwd().createFile(".mini-git/HEAD", .{});
defer head.close();
try head.writeAll(name);
}Now the latest commit is stored in HEAD.
Show the Log
The log starts at HEAD, reads the commit object, prints it, then follows the parent.
fn readObject(allocator: std.mem.Allocator, hash: []const u8) ![]u8 {
const path = try std.fmt.allocPrint(
allocator,
".mini-git/objects/{s}",
.{hash},
);
defer allocator.free(path);
return std.fs.cwd().readFileAlloc(allocator, path, 1024 * 1024);
}fn log(allocator: std.mem.Allocator) !void {
var current = try readSmallFile(allocator, ".mini-git/HEAD");
defer allocator.free(current);
while (current.len > 0) {
const commit_data = try readObject(allocator, current);
defer allocator.free(commit_data);
std.debug.print("commit {s}\n{s}\n", .{ current, commit_data });
const parent_line_end = std.mem.indexOfScalar(u8, commit_data, '\n') orelse break;
const parent_line = commit_data[0..parent_line_end];
if (!std.mem.startsWith(u8, parent_line, "parent ")) {
break;
}
const parent = parent_line["parent ".len..];
if (parent.len == 0) {
break;
}
allocator.free(current);
current = try allocator.dupe(u8, parent);
}
}This follows the commit chain backward.
Checkout
Checkout restores files from a commit.
The commit contains lines like:
file hello.txt 2cf24dba...For each line, read the object and write it back to the working tree.
fn checkout(allocator: std.mem.Allocator, commit_hash: []const u8) !void {
const commit_data = try readObject(allocator, commit_hash);
defer allocator.free(commit_data);
var lines = std.mem.splitScalar(u8, commit_data, '\n');
while (lines.next()) |line| {
if (!std.mem.startsWith(u8, line, "file ")) {
continue;
}
var parts = std.mem.splitScalar(u8, line, ' ');
_ = parts.next();
const path = parts.next() orelse return error.InvalidCommit;
const hash = parts.next() orelse return error.InvalidCommit;
const data = try readObject(allocator, hash);
defer allocator.free(data);
const file = try std.fs.cwd().createFile(path, .{});
defer file.close();
try file.writeAll(data);
}
}This restores the tracked files from that commit.
Command Dispatch
Now the program needs to read command-line arguments.
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer _ = gpa.deinit();
const allocator = gpa.allocator();
var args = try std.process.argsWithAllocator(allocator);
defer args.deinit();
_ = args.next();
const command = args.next() orelse {
std.debug.print("usage: mini-git <command>\n", .{});
return;
};
if (std.mem.eql(u8, command, "init")) {
try initRepo();
} else if (std.mem.eql(u8, command, "add")) {
const path = args.next() orelse {
std.debug.print("usage: mini-git add <path>\n", .{});
return;
};
try addFile(allocator, path);
} else if (std.mem.eql(u8, command, "commit")) {
const message = args.next() orelse {
std.debug.print("usage: mini-git commit <message>\n", .{});
return;
};
try commit(allocator, message);
} else if (std.mem.eql(u8, command, "log")) {
try log(allocator);
} else if (std.mem.eql(u8, command, "checkout")) {
const hash = args.next() orelse {
std.debug.print("usage: mini-git checkout <hash>\n", .{});
return;
};
try checkout(allocator, hash);
} else {
std.debug.print("unknown command: {s}\n", .{command});
}
}Try It
Create a file:
echo "hello" > hello.txtInitialize the repository:
zig build run -- initAdd the file:
zig build run -- add hello.txtCommit it:
zig build run -- commit "first commit"Show the log:
zig build run -- logChange the file:
echo "changed" > hello.txtAdd and commit again:
zig build run -- add hello.txt
zig build run -- commit "second commit"Now log shows two commits.
Checkout an older commit by hash:
zig build run -- checkout <commit-hash>The file content changes back to the version stored in that commit.
What This Mini Git Does Not Do
This project is intentionally small.
It does not store directories as tree objects.
It does not compress objects.
It does not deduplicate index entries.
It does not handle branches.
It does not merge.
It does not track deletions.
It does not detect dirty working trees.
It does not use Git’s real object format.
Those missing features are exactly where real Git becomes deeper.
The Core Idea
Even this small version shows the heart of Git.
Files become objects.
Objects are named by hashes.
Commits point to previous commits.
HEAD points to the latest commit.
Checkout restores files from stored objects.
That is the basic model:
working files
add
objects + index
commit
commit object
HEADOnce this model is clear, real Git internals become much easier to read.