package opengraph import ( "context" "fmt" "io" "net/http" "net/url" "strings" "time" "golang.org/x/net/html" ) // Metadata contains extracted OpenGraph and meta data from a URL. type Metadata struct { Title string Description string ImageURL string VideoURL string SiteName string Type string // og:type } // Fetch fetches and parses OpenGraph metadata from a URL. func Fetch(ctx context.Context, targetURL string) (*Metadata, error) { req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil) if err != nil { return nil, fmt.Errorf("create request: %w", err) } req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)") client := &http.Client{ Timeout: 10 * time.Second, CheckRedirect: func(req *http.Request, via []*http.Request) error { if len(via) >= 5 { return fmt.Errorf("too many redirects") } return nil }, } resp, err := client.Do(req) if err != nil { return nil, fmt.Errorf("fetch url: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("unexpected status: %d", resp.StatusCode) } // Limit response body to 1MB body := io.LimitReader(resp.Body, 1<<20) doc, err := html.Parse(body) if err != nil { return nil, fmt.Errorf("parse html: %w", err) } meta := &Metadata{} parseNode(doc, meta) // Resolve relative URLs baseURL, _ := url.Parse(targetURL) if meta.ImageURL != "" && !strings.HasPrefix(meta.ImageURL, "http") { if imgURL, err := baseURL.Parse(meta.ImageURL); err == nil { meta.ImageURL = imgURL.String() } } if meta.VideoURL != "" && !strings.HasPrefix(meta.VideoURL, "http") { if vidURL, err := baseURL.Parse(meta.VideoURL); err == nil { meta.VideoURL = vidURL.String() } } return meta, nil } func parseNode(n *html.Node, meta *Metadata) { if n.Type == html.ElementNode { switch n.Data { case "meta": parseMeta(n, meta) case "title": if meta.Title == "" && n.FirstChild != nil { meta.Title = strings.TrimSpace(n.FirstChild.Data) } } } for c := n.FirstChild; c != nil; c = c.NextSibling { parseNode(c, meta) } } func parseMeta(n *html.Node, meta *Metadata) { var property, name, content string for _, attr := range n.Attr { switch attr.Key { case "property": property = attr.Val case "name": name = attr.Val case "content": content = attr.Val } } // OpenGraph properties switch property { case "og:title": meta.Title = content case "og:description": if meta.Description == "" { meta.Description = content } case "og:image": if meta.ImageURL == "" { meta.ImageURL = content } case "og:video", "og:video:url": if meta.VideoURL == "" { meta.VideoURL = content } case "og:site_name": meta.SiteName = content case "og:type": meta.Type = content } // Twitter cards switch name { case "twitter:title": if meta.Title == "" { meta.Title = content } case "twitter:description": if meta.Description == "" { meta.Description = content } case "twitter:image": if meta.ImageURL == "" { meta.ImageURL = content } case "description": if meta.Description == "" { meta.Description = content } } } // DownloadImage downloads an image from a URL and returns the data and content type. func DownloadImage(ctx context.Context, imageURL string) ([]byte, string, error) { req, err := http.NewRequestWithContext(ctx, "GET", imageURL, nil) if err != nil { return nil, "", fmt.Errorf("create request: %w", err) } req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)") client := &http.Client{Timeout: 30 * time.Second} resp, err := client.Do(req) if err != nil { return nil, "", fmt.Errorf("fetch image: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, "", fmt.Errorf("unexpected status: %d", resp.StatusCode) } contentType := resp.Header.Get("Content-Type") if !strings.HasPrefix(contentType, "image/") { return nil, "", fmt.Errorf("not an image: %s", contentType) } // Limit to 50MB data, err := io.ReadAll(io.LimitReader(resp.Body, 50<<20)) if err != nil { return nil, "", fmt.Errorf("read image: %w", err) } return data, contentType, nil }