Cat-Ling
diff --git a/‎README.md‎
Lines changed: 96 additions & 2 deletions b/‎README.md‎
Lines changed: 96 additions & 2 deletions
diff --git a/‎archive.go‎
Lines changed: 78 additions & 0 deletions b/‎archive.go‎
Lines changed: 78 additions & 0 deletions
@@ -1,2 +1,96 @@
-# wayback-go
-A wayback machine site downloader
+# Wayback Go Downloader
+
+A command-line tool to download websites from the Wayback Machine, re-written in Go.
+
+## Overview
+
+This program is a Go port of the popular Ruby-based `wayback-machine-downloader` by hartator (available at [https://github.com/hartator/wayback-machine-downloader](https://github.com/hartator/wayback-machine-downloader)). It allows you to download all available snapshots of a given URL from the Internet Archive's Wayback Machine, saving them locally.
+
+## Features
+
+*   **Download Entire Websites:** Recursively downloads all files associated with a given URL from the Wayback Machine.
+*   **Exact URL Download:** Option to download only the exact URL provided, without following links.
+*   **Timestamp Filtering:** Specify `from` and `to` timestamps to download snapshots within a particular date range.
+*   **Regex Filtering:** Include or exclude URLs based on regular expressions.
+*   **All Timestamps:** Download all available timestamps for each file, not just the latest.
+*   **Concurrency:** Utilizes multiple threads for faster downloads.
+*   **List Only Mode:** Preview the list of files that would be downloaded in JSON format without actually downloading them.
+*   **Error Handling:** Option to download all files, even those that return errors.
+
+## Installation
+
+To install `wayback-go`, you need to have Go installed on your system (Go 1.16 or later is recommended).
+
+1.  **Clone the repository:**
+    ```bash
+    git clone https://github.com/your-username/wayback-go.git # Replace with actual repo URL
+    cd wayback-go
+    ```
+2.  **Build the executable:**
+    ```bash
+    go build -o wayback-go
+    ```
+3.  **Move to your PATH (optional):**
+    ```bash
+    sudo mv wayback-go /usr/local/bin/
+    ```
+
+## Usage
+
+```bash
+./wayback-go --url <URL> [options]
+```
+
+### Options:
+
+*   `--url <URL>`: The base URL to download from Wayback Machine (required).
+*   `--exact-url`: Download only the exact URL.
+*   `--dir <directory>`: Directory to save the downloaded files (defaults to `websites/<domain>`).
+*   `--all-timestamps`: Download all available timestamps for each file.
+*   `--from <timestamp>`: Download snapshots from this timestamp (e.g., `20060102150405`).
+*   `--to <timestamp>`: Download snapshots to this timestamp (e.g., `20060102150405`).
+*   `--only <regex>`: Only download URLs matching this regex filter.
+*   `--exclude <regex>`: Exclude URLs matching this regex filter.
+*   `--all`: Download all files, even if they return an error.
+*   `--max-pages <number>`: Maximum number of snapshot pages to retrieve from Wayback Machine API (default: 100).
+*   `--threads <number>`: Number of concurrent download threads (default: 1).
+*   `--list`: Only list file URLs in JSON format, won't download anything.
+
+### Examples:
+
+1.  **Download a website:**
+    ```bash
+    ./wayback-go --url https://example.com
+    ```
+2.  **Download only a specific URL:**
+    ```bash
+    ./wayback-go --url https://example.com/page.html --exact-url
+    ```
+3.  **Download with a specific output directory:**
+    ```bash
+    ./wayback-go --url https://example.com --dir my_archive
+    ```
+4.  **Download snapshots from a specific date:**
+    ```bash
+    ./wayback-go --url https://example.com --from 20200101000000 --to 20201231235959
+    ```
+5.  **List files in JSON format:**
+    ```bash
+    ./wayback-go --url https://example.com --list
+    ```
+6.  **Download with 5 concurrent threads:**
+    ```bash
+    ./wayback-go --url https://example.com --threads 5
+    ```
+7.  **Only download CSS files:**
+    ```bash
+    ./wayback-go --url https://example.com --only "\.css$"
+    ```
+
+## Contributing
+
+Contributions are welcome! Please feel free to open issues or submit pull requests.
+
+## License
+
+This project is licensed under the MIT License. See the `LICENSE` file for details.
@@ -0,0 +1,78 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"io/ioutil"
+	"net/http"
+	"net/url"
+	"strconv"
+)
+
+// getRawListFromAPI fetches a raw list of snapshots from the Wayback Machine CDX API.
+func (d *Downloader) getRawListFromAPI(targetURL string, pageIndex int) ([]FileRemoteInfo, error) {
+	requestURL, err := url.Parse("https://web.archive.org/cdx/search/xd")
+	if err != nil {
+		return nil, fmt.Errorf("error parsing base URL: %w", err)
+	}
+
+	params := url.Values{}
+	params.Add("output", "json")
+	params.Add("url", targetURL)
+
+	// Add parameters for API
+	params.Add("fl", "timestamp,original")
+	params.Add("collapse", "digest")
+	params.Add("gzip", "false")
+
+	if !d.All {
+		params.Add("filter", "statuscode:200")
+	}
+
+	if d.FromTimestamp != 0 {
+		params.Add("from", strconv.Itoa(d.FromTimestamp))
+	}
+	if d.ToTimestamp != 0 {
+		params.Add("to", strconv.Itoa(d.ToTimestamp))
+	}
+
+	if pageIndex != -1 {
+		params.Add("page", strconv.Itoa(pageIndex))
+	}
+
+	requestURL.RawQuery = params.Encode()
+
+	resp, err := http.Get(requestURL.String())
+	if err != nil {
+		return nil, fmt.Errorf("error making API request: %w", err)
+	}
+	defer resp.Body.Close()
+
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return nil, fmt.Errorf("error reading API response: %w", err)
+	}
+
+	var rawJSON [][]string
+	err = json.Unmarshal(body, &rawJSON)
+	if err != nil {
+		// If parsing fails, it might be an empty array or malformed JSON
+		return []FileRemoteInfo{}, nil
+	}
+
+	if len(rawJSON) > 0 && len(rawJSON[0]) == 2 && rawJSON[0][0] == "timestamp" && rawJSON[0][1] == "original" {
+		rawJSON = rawJSON[1:] // Remove header row
+	}
+
+	var snapshots []FileRemoteInfo
+	for _, item := range rawJSON {
+		if len(item) == 2 {
+			snapshots = append(snapshots, FileRemoteInfo{
+				Timestamp: item[0],
+				FileURL:   item[1],
+			})
+		}
+	}
+
+	return snapshots, nil
+}