mirror of
https://github.com/ipfs/kubo.git
synced 2026-02-22 02:47:48 +08:00
up until now there has been a very annoying bug with get, we would get halting behavior. I'm not 100% sure this commit fixes it, but it should. It certainly fixes others found in the process of digging into the get / tar extractor code. (wish we could repro the bug reliably enough to make a test case). This is a much cleaner tar writer. the ad-hoc, error-prone synch for the tar reader is gone (with i believe was incorrect). it is replaced with a simple pipe and bufio. The tar logic is now in tar.Writer, which writes unixfs dag nodes into a tar archive (no need for synch here). And get's reader is constructed with DagArchive which sets up the pipe + bufio. NOTE: this commit also changes this behavior of `get`: When retrieving a single file, if the file exists, get would fail. this emulated the behavior of wget by default, which (without opts) does not overwrite if the file is there. This change makes get fail if the file is available locally. This seems more intuitive to me as expected from a unix tool-- though perhaps it should be discussed more before adopting. Everything seems to work fine, and i have not been able to reproduce the get halt bug. License: MIT Signed-off-by: Juan Batiz-Benet <juan@benet.ai>
110 lines
2.4 KiB
Go
110 lines
2.4 KiB
Go
package tar
|
|
|
|
import (
|
|
"archive/tar"
|
|
"io"
|
|
"os"
|
|
gopath "path"
|
|
fp "path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
type Extractor struct {
|
|
Path string
|
|
}
|
|
|
|
func (te *Extractor) Extract(reader io.Reader) error {
|
|
tarReader := tar.NewReader(reader)
|
|
|
|
// Check if the output path already exists, so we know whether we should
|
|
// create our output with that name, or if we should put the output inside
|
|
// a preexisting directory
|
|
rootExists := true
|
|
rootIsDir := false
|
|
if stat, err := os.Stat(te.Path); err != nil && os.IsNotExist(err) {
|
|
rootExists = false
|
|
} else if err != nil {
|
|
return err
|
|
} else if stat.IsDir() {
|
|
rootIsDir = true
|
|
}
|
|
|
|
// files come recursively in order (i == 0 is root directory)
|
|
for i := 0; ; i++ {
|
|
header, err := tarReader.Next()
|
|
if err != nil && err != io.EOF {
|
|
return err
|
|
}
|
|
if header == nil || err == io.EOF {
|
|
break
|
|
}
|
|
|
|
if header.Typeflag == tar.TypeDir {
|
|
if err := te.extractDir(header, i); err != nil {
|
|
return err
|
|
}
|
|
continue
|
|
}
|
|
|
|
if err := te.extractFile(header, tarReader, i, rootExists, rootIsDir); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// outputPath returns the path at whicht o place tarPath
|
|
func (te *Extractor) outputPath(tarPath string) string {
|
|
elems := strings.Split(tarPath, "/") // break into elems
|
|
elems = elems[1:] // remove original root
|
|
|
|
path := fp.Join(elems...) // join elems
|
|
path = fp.Join(te.Path, path) // rebase on extractor root
|
|
return path
|
|
}
|
|
|
|
func (te *Extractor) extractDir(h *tar.Header, depth int) error {
|
|
path := te.outputPath(h.Name)
|
|
|
|
if depth == 0 {
|
|
// if this is the root root directory, use it as the output path for remaining files
|
|
te.Path = path
|
|
}
|
|
|
|
err := os.MkdirAll(path, 0755)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (te *Extractor) extractFile(h *tar.Header, r *tar.Reader, depth int, rootExists bool, rootIsDir bool) error {
|
|
path := te.outputPath(h.Name)
|
|
|
|
if depth == 0 { // if depth is 0, this is the only file (we aren't 'ipfs get'ing a directory)
|
|
if rootExists && rootIsDir {
|
|
// putting file inside of a root dir.
|
|
fnameo := gopath.Base(h.Name)
|
|
fnamen := fp.Base(path)
|
|
// add back original name if lost.
|
|
if fnameo != fnamen {
|
|
path = fp.Join(path, fnameo)
|
|
}
|
|
} // else if old file exists, just overwrite it.
|
|
}
|
|
|
|
file, err := os.Create(path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer file.Close()
|
|
|
|
_, err = io.Copy(file, r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|