Pinterest-like app for saving images, videos, quotes, and embeds. Features: - Go backend with PostgreSQL, SSR templates - Console-based admin auth (login/logout via browser console) - Item types: images, videos (ffmpeg transcoding), quotes, embeds - Media stored as BLOBs in PostgreSQL - OpenGraph metadata extraction for links - Embed detection for YouTube, Vimeo, Twitter/X - Masonry grid layout, item detail pages - Tag system with filtering - Refresh metadata endpoint with change warnings - Replace media endpoint for updating item images/videos
185 lines
4.1 KiB
Go
185 lines
4.1 KiB
Go
package opengraph
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// Metadata contains extracted OpenGraph and meta data from a URL.
|
|
type Metadata struct {
|
|
Title string
|
|
Description string
|
|
ImageURL string
|
|
VideoURL string
|
|
SiteName string
|
|
Type string // og:type
|
|
}
|
|
|
|
// Fetch fetches and parses OpenGraph metadata from a URL.
|
|
func Fetch(ctx context.Context, targetURL string) (*Metadata, error) {
|
|
req, err := http.NewRequestWithContext(ctx, "GET", targetURL, nil)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
|
|
|
|
client := &http.Client{
|
|
Timeout: 10 * time.Second,
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
if len(via) >= 5 {
|
|
return fmt.Errorf("too many redirects")
|
|
}
|
|
return nil
|
|
},
|
|
}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("fetch url: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("unexpected status: %d", resp.StatusCode)
|
|
}
|
|
|
|
// Limit response body to 1MB
|
|
body := io.LimitReader(resp.Body, 1<<20)
|
|
|
|
doc, err := html.Parse(body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parse html: %w", err)
|
|
}
|
|
|
|
meta := &Metadata{}
|
|
parseNode(doc, meta)
|
|
|
|
// Resolve relative URLs
|
|
baseURL, _ := url.Parse(targetURL)
|
|
if meta.ImageURL != "" && !strings.HasPrefix(meta.ImageURL, "http") {
|
|
if imgURL, err := baseURL.Parse(meta.ImageURL); err == nil {
|
|
meta.ImageURL = imgURL.String()
|
|
}
|
|
}
|
|
if meta.VideoURL != "" && !strings.HasPrefix(meta.VideoURL, "http") {
|
|
if vidURL, err := baseURL.Parse(meta.VideoURL); err == nil {
|
|
meta.VideoURL = vidURL.String()
|
|
}
|
|
}
|
|
|
|
return meta, nil
|
|
}
|
|
|
|
func parseNode(n *html.Node, meta *Metadata) {
|
|
if n.Type == html.ElementNode {
|
|
switch n.Data {
|
|
case "meta":
|
|
parseMeta(n, meta)
|
|
case "title":
|
|
if meta.Title == "" && n.FirstChild != nil {
|
|
meta.Title = strings.TrimSpace(n.FirstChild.Data)
|
|
}
|
|
}
|
|
}
|
|
|
|
for c := n.FirstChild; c != nil; c = c.NextSibling {
|
|
parseNode(c, meta)
|
|
}
|
|
}
|
|
|
|
func parseMeta(n *html.Node, meta *Metadata) {
|
|
var property, name, content string
|
|
for _, attr := range n.Attr {
|
|
switch attr.Key {
|
|
case "property":
|
|
property = attr.Val
|
|
case "name":
|
|
name = attr.Val
|
|
case "content":
|
|
content = attr.Val
|
|
}
|
|
}
|
|
|
|
// OpenGraph properties
|
|
switch property {
|
|
case "og:title":
|
|
meta.Title = content
|
|
case "og:description":
|
|
if meta.Description == "" {
|
|
meta.Description = content
|
|
}
|
|
case "og:image":
|
|
if meta.ImageURL == "" {
|
|
meta.ImageURL = content
|
|
}
|
|
case "og:video", "og:video:url":
|
|
if meta.VideoURL == "" {
|
|
meta.VideoURL = content
|
|
}
|
|
case "og:site_name":
|
|
meta.SiteName = content
|
|
case "og:type":
|
|
meta.Type = content
|
|
}
|
|
|
|
// Twitter cards
|
|
switch name {
|
|
case "twitter:title":
|
|
if meta.Title == "" {
|
|
meta.Title = content
|
|
}
|
|
case "twitter:description":
|
|
if meta.Description == "" {
|
|
meta.Description = content
|
|
}
|
|
case "twitter:image":
|
|
if meta.ImageURL == "" {
|
|
meta.ImageURL = content
|
|
}
|
|
case "description":
|
|
if meta.Description == "" {
|
|
meta.Description = content
|
|
}
|
|
}
|
|
}
|
|
|
|
// DownloadImage downloads an image from a URL and returns the data and content type.
|
|
func DownloadImage(ctx context.Context, imageURL string) ([]byte, string, error) {
|
|
req, err := http.NewRequestWithContext(ctx, "GET", imageURL, nil)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
|
|
|
|
client := &http.Client{Timeout: 30 * time.Second}
|
|
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("fetch image: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, "", fmt.Errorf("unexpected status: %d", resp.StatusCode)
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
if !strings.HasPrefix(contentType, "image/") {
|
|
return nil, "", fmt.Errorf("not an image: %s", contentType)
|
|
}
|
|
|
|
// Limit to 50MB
|
|
data, err := io.ReadAll(io.LimitReader(resp.Body, 50<<20))
|
|
if err != nil {
|
|
return nil, "", fmt.Errorf("read image: %w", err)
|
|
}
|
|
|
|
return data, contentType, nil
|
|
}
|