package services import ( "bytes" "context" "database/sql" "encoding/json" "fmt" "io" "mime" "net/http" "net/url" "path" "strings" "time" "github.com/disintegration/imaging" "golang.org/x/net/html" "git.soup.land/soup/lookbook/internal/data/image" "git.soup.land/soup/lookbook/internal/data/item" ) const thumbWidth = 480 func CreateItemFromURL(ctx context.Context, db *sql.DB, sourceURL string) (item.Row, error) { meta, err := FetchMetadata(ctx, sourceURL) if err != nil { return item.Row{}, err } row, err := item.QCreate(ctx, db, sourceURL, meta.Title, meta.Description, meta.SiteName) if err != nil { return item.Row{}, err } if err := storeImages(ctx, db, row.ID, meta); err != nil { return row, err } return row, nil } func RefreshItemFromURL(ctx context.Context, db *sql.DB, row item.Row) error { meta, err := FetchMetadata(ctx, row.SourceURL) if err != nil { return err } if err := item.QUpdateMeta(ctx, db, row.ID, meta.Title, meta.Description, meta.SiteName); err != nil { return err } if err := image.QDeleteByItem(ctx, db, row.ID); err != nil { return err } return storeImages(ctx, db, row.ID, meta) } type Metadata struct { Title string Description string SiteName string ImageURL string } func FetchMetadata(ctx context.Context, sourceURL string) (Metadata, error) { resp, err := fetchURL(ctx, sourceURL) if err != nil { return Metadata{}, err } defer resp.Body.Close() body, err := io.ReadAll(io.LimitReader(resp.Body, 8<<20)) if err != nil { return Metadata{}, err } meta := Metadata{} contentType := resp.Header.Get("Content-Type") meta.ImageURL = extractImageURL(resp.Request.URL, contentType) if strings.HasPrefix(strings.ToLower(contentType), "image/") { if meta.Title == "" { meta.Title = path.Base(resp.Request.URL.Path) } if meta.SiteName == "" { meta.SiteName = resp.Request.URL.Hostname() } return meta, nil } doc, err := html.Parse(bytes.NewReader(body)) if err != nil { return meta, nil } extractMeta(doc, &meta) if meta.Title == "" { meta.Title = titleFromDoc(doc) } if meta.ImageURL == "" { if oembed, err := fetchOEmbed(ctx, sourceURL); err == nil { if meta.Title == "" { meta.Title = oembed.Title } if meta.Description == "" { meta.Description = oembed.Description } if meta.ImageURL == "" { meta.ImageURL = oembed.ThumbnailURL } if meta.SiteName == "" { meta.SiteName = oembed.ProviderName } } } return meta, nil } type oEmbedResponse struct { Title string `json:"title"` Description string `json:"description"` ThumbnailURL string `json:"thumbnail_url"` ProviderName string `json:"provider_name"` } func fetchOEmbed(ctx context.Context, sourceURL string) (oEmbedResponse, error) { oembedURL := fmt.Sprintf("https://noembed.com/embed?url=%s", url.QueryEscape(sourceURL)) resp, err := fetchURL(ctx, oembedURL) if err != nil { return oEmbedResponse{}, err } defer resp.Body.Close() var payload oEmbedResponse if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return oEmbedResponse{}, err } return payload, nil } func fetchURL(ctx context.Context, rawURL string) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, http.MethodGet, rawURL, nil) if err != nil { return nil, err } req.Header.Set("User-Agent", "lookbook/1.0") client := &http.Client{Timeout: 12 * time.Second} resp, err := client.Do(req) if err != nil { return nil, err } if resp.StatusCode < 200 || resp.StatusCode >= 400 { resp.Body.Close() return nil, fmt.Errorf("fetch %s: status %d", rawURL, resp.StatusCode) } return resp, nil } func extractMeta(n *html.Node, meta *Metadata) { if n.Type == html.ElementNode && n.Data == "meta" { var property, content, name string for _, attr := range n.Attr { switch strings.ToLower(attr.Key) { case "property": property = strings.ToLower(attr.Val) case "content": content = strings.TrimSpace(attr.Val) case "name": name = strings.ToLower(attr.Val) } } if content != "" { switch property { case "og:title", "twitter:title": if meta.Title == "" { meta.Title = content } case "og:description", "twitter:description": if meta.Description == "" { meta.Description = content } case "og:site_name": if meta.SiteName == "" { meta.SiteName = content } case "og:image", "twitter:image": if meta.ImageURL == "" { meta.ImageURL = content } } if meta.Description == "" && name == "description" { meta.Description = content } } } for c := n.FirstChild; c != nil; c = c.NextSibling { extractMeta(c, meta) } } func titleFromDoc(n *html.Node) string { if n.Type == html.ElementNode && n.Data == "title" && n.FirstChild != nil { return strings.TrimSpace(n.FirstChild.Data) } for c := n.FirstChild; c != nil; c = c.NextSibling { if title := titleFromDoc(c); title != "" { return title } } return "" } func extractImageURL(baseURL *url.URL, contentType string) string { if strings.HasPrefix(strings.ToLower(contentType), "image/") { return baseURL.String() } return "" } func storeImages(ctx context.Context, db *sql.DB, itemID int64, meta Metadata) error { if meta.ImageURL == "" { return nil } resp, err := fetchURL(ctx, meta.ImageURL) if err != nil { return err } defer resp.Body.Close() payload, err := io.ReadAll(io.LimitReader(resp.Body, 16<<20)) if err != nil { return err } contentType := resp.Header.Get("Content-Type") if contentType == "" { contentType = mime.TypeByExtension(strings.ToLower(path.Ext(resp.Request.URL.Path))) } width, height, thumbBytes, thumbHeight, err := createThumb(payload) if err != nil { return err } _, err = image.QCreate(ctx, db, itemID, meta.ImageURL, contentType, payload, width, height, false) if err != nil { return err } if thumbBytes != nil { _, err = image.QCreate(ctx, db, itemID, meta.ImageURL, thumbContentType(contentType), thumbBytes, thumbWidth, thumbHeight, true) if err != nil { return err } } return nil } func createThumb(payload []byte) (int, int, []byte, int, error) { img, err := imaging.Decode(bytes.NewReader(payload)) if err != nil { return 0, 0, nil, 0, nil } bounds := img.Bounds() width := bounds.Dx() height := bounds.Dy() if width <= thumbWidth { return width, height, payload, height, nil } thumb := imaging.Resize(img, thumbWidth, 0, imaging.Lanczos) buf := new(bytes.Buffer) if err := imaging.Encode(buf, thumb, imaging.JPEG); err != nil { return width, height, nil, 0, err } return width, height, buf.Bytes(), thumb.Bounds().Dy(), nil } func thumbContentType(_ string) string { return "image/jpeg" }