diff --git a/internal/services/metadata.go b/internal/services/metadata.go index 502c0f4..9efe9b6 100644 --- a/internal/services/metadata.go +++ b/internal/services/metadata.go @@ -11,6 +11,7 @@ import ( "net/http" "net/url" "path" + "regexp" "strings" "time" @@ -21,6 +22,8 @@ import ( "git.soup.land/soup/lookbook/internal/data/item" ) +var tweetURLPattern = regexp.MustCompile(`^https?://(?:www\.)?(?:twitter\.com|x\.com)/([^/]+)/status/(\d+)`) + const thumbWidth = 480 func CreateItemFromURL(ctx context.Context, db *sql.DB, sourceURL string) (item.Row, error) { @@ -66,6 +69,11 @@ type Metadata struct { } func FetchMetadata(ctx context.Context, sourceURL string) (Metadata, error) { + // Check if this is a Twitter/X URL and use syndication API + if meta, ok := fetchTwitterMetadata(ctx, sourceURL); ok { + return meta, nil + } + resp, err := fetchURL(ctx, sourceURL) if err != nil { return Metadata{}, err @@ -129,6 +137,120 @@ type oEmbedResponse struct { ProviderName string `json:"provider_name"` } +// Twitter/X syndication API response structures +type twitterSyndicationResponse struct { + Text string `json:"text"` + User twitterUser `json:"user"` + Photos []twitterPhoto `json:"photos"` + Video *twitterVideo `json:"video"` + Card *twitterCard `json:"card"` + Media []twitterMediaEntry `json:"mediaDetails"` +} + +type twitterUser struct { + Name string `json:"name"` + ScreenName string `json:"screen_name"` +} + +type twitterPhoto struct { + URL string `json:"url"` +} + +type twitterVideo struct { + Poster string `json:"poster"` +} + +type twitterCard struct { + ThumbnailImageOriginal string `json:"thumbnail_image_original"` +} + +type twitterMediaEntry struct { + MediaURLHTTPS string `json:"media_url_https"` + Type string `json:"type"` +} + +// fetchTwitterMetadata attempts to fetch metadata from Twitter's syndication API +// Returns the metadata and true if successful, or empty metadata and false if not a Twitter URL or fetch failed +func fetchTwitterMetadata(ctx context.Context, sourceURL string) (Metadata, bool) { + matches := tweetURLPattern.FindStringSubmatch(sourceURL) + if matches == nil { + return Metadata{}, false + } + + username := matches[1] + tweetID := matches[2] + + syndicationURL := fmt.Sprintf("https://cdn.syndication.twimg.com/tweet-result?id=%s&token=0", tweetID) + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, syndicationURL, nil) + if err != nil { + return Metadata{}, false + } + + // The syndication API requires specific headers + req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + req.Header.Set("Referer", "https://platform.twitter.com/") + + client := &http.Client{Timeout: 12 * time.Second} + resp, err := client.Do(req) + if err != nil { + return Metadata{}, false + } + defer resp.Body.Close() + + if resp.StatusCode != 200 { + return Metadata{}, false + } + + // Check content type - if it's HTML, it's an error page + contentType := resp.Header.Get("Content-Type") + if !strings.Contains(contentType, "application/json") { + return Metadata{}, false + } + + var tweet twitterSyndicationResponse + if err := json.NewDecoder(resp.Body).Decode(&tweet); err != nil { + return Metadata{}, false + } + + meta := Metadata{ + Title: truncateText(tweet.Text, 200), + Description: fmt.Sprintf("@%s", username), + SiteName: "X", + } + + // Try to get image URL from various sources + // Priority: photos > video poster > card thumbnail > mediaDetails + if len(tweet.Photos) > 0 { + meta.ImageURL = tweet.Photos[0].URL + } else if tweet.Video != nil && tweet.Video.Poster != "" { + meta.ImageURL = tweet.Video.Poster + } else if tweet.Card != nil && tweet.Card.ThumbnailImageOriginal != "" { + meta.ImageURL = tweet.Card.ThumbnailImageOriginal + } else if len(tweet.Media) > 0 { + meta.ImageURL = tweet.Media[0].MediaURLHTTPS + } + + // If we got user info, use it for a better description + if tweet.User.Name != "" { + meta.Description = fmt.Sprintf("%s (@%s)", tweet.User.Name, tweet.User.ScreenName) + } + + return meta, true +} + +func truncateText(s string, maxLen int) string { + if len(s) <= maxLen { + return s + } + // Try to break at a word boundary + truncated := s[:maxLen] + if lastSpace := strings.LastIndex(truncated, " "); lastSpace > maxLen/2 { + truncated = truncated[:lastSpace] + } + return truncated + "…" +} + func fetchOEmbed(ctx context.Context, sourceURL string) (oEmbedResponse, error) { oembedURL := fmt.Sprintf("https://noembed.com/embed?url=%s", url.QueryEscape(sourceURL)) resp, err := fetchURL(ctx, oembedURL)