Skip to content

Commit 9a37edc

Browse files
authored
feat(internal/fetch): remove unmatched tarball (#3062)
If a tarball is found by name in the Librarian cache directory but its SHA does not match the expected SHA, delete the tarball and fall through to redownload. Fixes #3043
1 parent d59c2a9 commit 9a37edc

2 files changed

Lines changed: 71 additions & 11 deletions

File tree

internal/fetch/cache.go

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"crypto/sha256"
1919
"fmt"
2020
"io"
21+
"log/slog"
2122
"os"
2223
"path/filepath"
2324
)
@@ -66,23 +67,29 @@ func RepoDir(repo, commit, expectedSHA256 string) (string, error) {
6667
}
6768

6869
tgz := tarballPath(cacheDir, repo, commit)
69-
outdir := filepath.Join(cacheDir, fmt.Sprintf("%s@%s", repo, commit))
70+
outDir := filepath.Join(cacheDir, fmt.Sprintf("%s@%s", repo, commit))
7071

7172
// Step 1: Check if extracted directory exists and contains files.
7273
if cached, err := extractedDir(cacheDir, repo, commit); err == nil {
7374
return cached, nil
7475
}
7576

7677
// Step 2: Check if tarball exists. Verify its SHA256 matches expectedSHA256.
77-
// If hash doesn't match, fall through to re-download.
78+
// If hash doesn't match or any error happens during the extraction, delete
79+
// the tarball and fall through to re-download.
7880
if _, err := os.Stat(tgz); err == nil {
7981
sha, err := computeSHA256(tgz)
80-
if err == nil && sha == expectedSHA256 {
81-
if err := os.MkdirAll(outdir, 0755); err != nil {
82-
return "", fmt.Errorf("failed creating %q: %w", outdir, err)
82+
if err == nil {
83+
if sha == expectedSHA256 {
84+
if err := os.MkdirAll(outDir, 0755); err != nil {
85+
return "", fmt.Errorf("failed creating %q: %w", outDir, err)
86+
}
87+
if err := ExtractTarball(tgz, outDir); err == nil {
88+
return outDir, nil
89+
}
8390
}
84-
if err := ExtractTarball(tgz, outdir); err == nil {
85-
return outdir, nil
91+
if err := os.Remove(tgz); err != nil {
92+
slog.Debug("failed to remove tarball", "path", tgz, "err", err)
8693
}
8794
}
8895
}
@@ -92,16 +99,16 @@ func RepoDir(repo, commit, expectedSHA256 string) (string, error) {
9299
if err := os.MkdirAll(filepath.Dir(tgz), 0755); err != nil {
93100
return "", fmt.Errorf("failed creating %q: %w", filepath.Dir(tgz), err)
94101
}
95-
if err := os.MkdirAll(outdir, 0755); err != nil {
96-
return "", fmt.Errorf("failed creating %q: %w", outdir, err)
102+
if err := os.MkdirAll(outDir, 0755); err != nil {
103+
return "", fmt.Errorf("failed creating %q: %w", outDir, err)
97104
}
98105
if err := DownloadTarball(tgz, sourceURL, expectedSHA256); err != nil {
99106
return "", err
100107
}
101-
if err := ExtractTarball(tgz, outdir); err != nil {
108+
if err := ExtractTarball(tgz, outDir); err != nil {
102109
return "", fmt.Errorf("failed to extract tarball: %w", err)
103110
}
104-
return outdir, nil
111+
return outDir, nil
105112
}
106113

107114
// cacheDir returns the root cache directory for librarian operations. It

internal/fetch/cache_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,59 @@ func TestRepoDir_TarballExists(t *testing.T) {
165165
}
166166
}
167167

168+
func TestRepoDir_MismatchTarball(t *testing.T) {
169+
cache := t.TempDir()
170+
t.Setenv(envLibrarianCache, cache)
171+
// Set up a mock web server to fetch a tarball.
172+
tarballData := createTestTarball(t, "googleapis-"+testCommit, map[string]string{
173+
"README.md": "# googleapis",
174+
"google/api/annotations.proto": "syntax = \"proto3\";",
175+
})
176+
expectedSHA := fmt.Sprintf("%x", sha256.Sum256(tarballData))
177+
178+
server := httptest.NewTLSServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
179+
if !strings.HasSuffix(r.URL.Path, "/archive/"+testCommit+".tar.gz") {
180+
http.NotFound(w, r)
181+
return
182+
}
183+
w.WriteHeader(http.StatusOK)
184+
w.Write(tarballData)
185+
}))
186+
defer server.Close()
187+
188+
defer func(t http.RoundTripper) { http.DefaultTransport = t }(http.DefaultTransport)
189+
http.DefaultTransport = server.Client().Transport
190+
// Create an empty tarball file in the cache directory.
191+
repo := strings.TrimPrefix(server.URL, "https://")
192+
downloadDir := filepath.Join(cache, "download")
193+
if err := os.MkdirAll(downloadDir, 0755); err != nil {
194+
t.Fatal(err)
195+
}
196+
tarballName := fmt.Sprintf("%s@%s.tar.gz", repo, testCommit)
197+
f, err := os.Create(filepath.Join(downloadDir, tarballName))
198+
if err != nil {
199+
t.Fatal(err)
200+
}
201+
defer f.Close()
202+
203+
got, err := RepoDir(repo, testCommit, expectedSHA)
204+
if err != nil {
205+
t.Fatal(err)
206+
}
207+
208+
if _, err := os.Stat(filepath.Join(got, "README.md")); err != nil {
209+
t.Errorf("expected README.md to exist: %v", err)
210+
}
211+
if _, err := os.Stat(filepath.Join(got, "google/api/annotations.proto")); err != nil {
212+
t.Errorf("expected google/api/annotations.proto to exist: %v", err)
213+
}
214+
215+
tarballPath := tarballPath(cache, repo, testCommit)
216+
if _, err := os.Stat(tarballPath); err != nil {
217+
t.Errorf("expected tarball to be cached at %q: %v", tarballPath, err)
218+
}
219+
}
220+
168221
func TestRepoDir_Download(t *testing.T) {
169222
cachedir := t.TempDir()
170223
t.Setenv(envLibrarianCache, cachedir)

0 commit comments

Comments
 (0)