lookbook/internal/embed/detect.go
soup 4763e69b79
fix: extract Twitter video thumbnails from correct API field
Twitter video thumbnails are provided by the FxTwitter API directly
in the video object's thumbnail_url field, not in a separate photos
array. This fixes thumbnail generation for Twitter video links.
2026-01-18 00:41:02 -05:00

401 lines
11 KiB
Go

package embed
import (
"context"
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"strings"
"time"
)
// Provider identifies the video hosting platform.
type Provider string
const (
ProviderYouTube Provider = "youtube"
ProviderVimeo Provider = "vimeo"
ProviderTwitter Provider = "twitter"
)
// EmbedInfo contains information about an embed (video, image gallery, or text).
type EmbedInfo struct {
Provider Provider
VideoID string
Title string
Description string
ThumbnailURL string // First/primary thumbnail (for backward compatibility)
ThumbnailURLs []string // All thumbnail URLs (for multi-image tweets)
EmbedHTML string
VideoURL string // Direct video URL (for Twitter videos)
MediaType string // "video", "images", "text" - for Twitter only
}
var (
youtubeRegex = regexp.MustCompile(`(?:youtube\.com/(?:watch\?v=|embed/|v/|shorts/)|youtu\.be/)([a-zA-Z0-9_-]{11})`)
vimeoRegex = regexp.MustCompile(`(?:vimeo\.com/(?:video/)?|player\.vimeo\.com/video/)(\d+)`)
twitterRegex = regexp.MustCompile(`(?:twitter\.com|x\.com)/([^/]+)/status/(\d+)`)
tcoRegex = regexp.MustCompile(`\s*https://t\.co/\S+`)
)
// Detect checks if a URL is a YouTube, Vimeo, or Twitter/X post and returns its info.
func Detect(ctx context.Context, targetURL string) (*EmbedInfo, error) {
// Try YouTube
if matches := youtubeRegex.FindStringSubmatch(targetURL); len(matches) > 1 {
return fetchYouTube(ctx, matches[1])
}
// Try Vimeo
if matches := vimeoRegex.FindStringSubmatch(targetURL); len(matches) > 1 {
return fetchVimeo(ctx, matches[1])
}
// Try Twitter/X
if matches := twitterRegex.FindStringSubmatch(targetURL); len(matches) > 2 {
return fetchTwitter(ctx, matches[2], targetURL)
}
return nil, nil // Not a recognized embed
}
func fetchYouTube(ctx context.Context, videoID string) (*EmbedInfo, error) {
// YouTube thumbnails are available without API
thumbnailURL := fmt.Sprintf("https://img.youtube.com/vi/%s/maxresdefault.jpg", videoID)
// Try to get metadata via oEmbed
oembedURL := fmt.Sprintf("https://www.youtube.com/oembed?url=%s&format=json",
url.QueryEscape("https://www.youtube.com/watch?v="+videoID))
var title string
if meta, err := fetchOEmbed(ctx, oembedURL); err == nil {
title = meta.Title
}
embedHTML := fmt.Sprintf(
`<iframe width="560" height="315" src="https://www.youtube.com/embed/%s" frameborder="0" allowfullscreen></iframe>`,
videoID,
)
return &EmbedInfo{
Provider: ProviderYouTube,
VideoID: videoID,
Title: title,
ThumbnailURL: thumbnailURL,
EmbedHTML: embedHTML,
}, nil
}
func fetchVimeo(ctx context.Context, videoID string) (*EmbedInfo, error) {
oembedURL := fmt.Sprintf("https://vimeo.com/api/oembed.json?url=%s",
url.QueryEscape("https://vimeo.com/"+videoID))
meta, err := fetchOEmbed(ctx, oembedURL)
if err != nil {
return nil, fmt.Errorf("vimeo oembed: %w", err)
}
embedHTML := fmt.Sprintf(
`<iframe src="https://player.vimeo.com/video/%s" width="640" height="360" frameborder="0" allowfullscreen></iframe>`,
videoID,
)
return &EmbedInfo{
Provider: ProviderVimeo,
VideoID: videoID,
Title: meta.Title,
Description: meta.Description,
ThumbnailURL: meta.ThumbnailURL,
EmbedHTML: embedHTML,
}, nil
}
// fxTwitterResponse represents the FxTwitter API response
type fxTwitterResponse struct {
Code int `json:"code"`
Message string `json:"message"`
Tweet struct {
Text string `json:"text"`
Author struct {
Name string `json:"name"`
ScreenName string `json:"screen_name"`
} `json:"author"`
Media struct {
Photos []struct {
URL string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"photos"`
Videos []struct {
URL string `json:"url"`
ThumbnailURL string `json:"thumbnail_url"`
} `json:"videos"`
} `json:"media"`
IsNoteTweet bool `json:"is_note_tweet"`
} `json:"tweet"`
}
// twitterSyndicationResponse represents the Twitter syndication API response (fallback)
type twitterSyndicationResponse struct {
Text string `json:"text"`
User struct {
Name string `json:"name"`
ScreenName string `json:"screen_name"`
} `json:"user"`
Photos []struct {
URL string `json:"url"`
Width int `json:"width"`
Height int `json:"height"`
} `json:"photos"`
MediaDetails []struct {
MediaURLHTTPS string `json:"media_url_https"`
Type string `json:"type"`
VideoInfo struct {
Variants []struct {
ContentType string `json:"content_type"`
URL string `json:"url"`
Bitrate int `json:"bitrate,omitempty"`
} `json:"variants"`
} `json:"video_info"`
} `json:"mediaDetails"`
Video struct {
Poster string `json:"poster"`
} `json:"video"`
}
func fetchTwitter(ctx context.Context, tweetID string, originalURL string) (*EmbedInfo, error) {
// Extract username from URL for FxTwitter API
matches := twitterRegex.FindStringSubmatch(originalURL)
if len(matches) < 3 {
return nil, fmt.Errorf("invalid twitter URL format")
}
username := matches[1]
// Try FxTwitter API first (supports full note tweets)
fxURL := fmt.Sprintf("https://api.fxtwitter.com/%s/status/%s", username, tweetID)
req, err := http.NewRequestWithContext(ctx, "GET", fxURL, nil)
if err == nil {
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
if err == nil {
defer resp.Body.Close()
if resp.StatusCode == http.StatusOK {
var fxResp fxTwitterResponse
if err := json.NewDecoder(resp.Body).Decode(&fxResp); err == nil && fxResp.Code == 200 {
// Successfully got data from FxTwitter
return parseFxTwitterResponse(&fxResp, tweetID, originalURL)
}
}
}
}
// Fallback to syndication API (may have truncated text for note tweets)
return fetchTwitterSyndication(ctx, tweetID, originalURL)
}
func parseFxTwitterResponse(fxResp *fxTwitterResponse, tweetID string, originalURL string) (*EmbedInfo, error) {
tweet := &fxResp.Tweet
// Collect media
var thumbnailURL, videoURL string
var thumbnailURLs []string
var mediaType string
if len(tweet.Media.Videos) > 0 {
videoURL = tweet.Media.Videos[0].URL
thumbnailURL = tweet.Media.Videos[0].ThumbnailURL
mediaType = "video"
} else if len(tweet.Media.Photos) > 0 {
thumbnailURL = tweet.Media.Photos[0].URL
for _, photo := range tweet.Media.Photos {
thumbnailURLs = append(thumbnailURLs, photo.URL)
}
mediaType = "images"
} else {
mediaType = "text"
}
// Build embed HTML using Twitter's embed widget
embedHTML := fmt.Sprintf(
`<blockquote class="twitter-tweet"><a href="%s"></a></blockquote><script async src="https://platform.twitter.com/widgets.js"></script>`,
originalURL,
)
title := fmt.Sprintf("@%s", tweet.Author.ScreenName)
if tweet.Author.Name != "" {
title = fmt.Sprintf("%s (@%s)", tweet.Author.Name, tweet.Author.ScreenName)
}
// Clean up tweet text - remove trailing t.co URLs
description := tcoRegex.ReplaceAllString(tweet.Text, "")
description = strings.TrimSpace(description)
return &EmbedInfo{
Provider: ProviderTwitter,
VideoID: tweetID,
Title: title,
Description: description,
ThumbnailURL: thumbnailURL,
ThumbnailURLs: thumbnailURLs,
VideoURL: videoURL,
EmbedHTML: embedHTML,
MediaType: mediaType,
}, nil
}
func fetchTwitterSyndication(ctx context.Context, tweetID string, originalURL string) (*EmbedInfo, error) {
apiURL := fmt.Sprintf("https://cdn.syndication.twimg.com/tweet-result?id=%s&token=0", tweetID)
req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, fmt.Errorf("twitter syndication: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("twitter syndication status: %d", resp.StatusCode)
}
var tweet twitterSyndicationResponse
if err := json.NewDecoder(resp.Body).Decode(&tweet); err != nil {
return nil, fmt.Errorf("twitter syndication decode: %w", err)
}
// Find thumbnail and video URL from media
var thumbnailURL, videoURL string
var thumbnailURLs []string
var mediaType string
if len(tweet.Photos) > 0 {
thumbnailURL = tweet.Photos[0].URL
// Collect all photo URLs for multi-image tweets
for _, photo := range tweet.Photos {
thumbnailURLs = append(thumbnailURLs, photo.URL)
}
mediaType = "images"
} else if len(tweet.MediaDetails) > 0 {
media := tweet.MediaDetails[0]
thumbnailURL = media.MediaURLHTTPS
// Extract video URL - find highest bitrate MP4
if media.Type == "video" || media.Type == "animated_gif" {
var bestBitrate int
for _, v := range media.VideoInfo.Variants {
if v.ContentType == "video/mp4" && v.Bitrate >= bestBitrate {
bestBitrate = v.Bitrate
videoURL = v.URL
}
}
mediaType = "video"
}
} else {
mediaType = "text"
}
if thumbnailURL == "" && tweet.Video.Poster != "" {
thumbnailURL = tweet.Video.Poster
}
// Build embed HTML using Twitter's embed widget
embedHTML := fmt.Sprintf(
`<blockquote class="twitter-tweet"><a href="%s"></a></blockquote><script async src="https://platform.twitter.com/widgets.js"></script>`,
originalURL,
)
title := fmt.Sprintf("@%s", tweet.User.ScreenName)
if tweet.User.Name != "" {
title = fmt.Sprintf("%s (@%s)", tweet.User.Name, tweet.User.ScreenName)
}
// Clean up tweet text - remove trailing t.co URLs
description := tcoRegex.ReplaceAllString(tweet.Text, "")
description = strings.TrimSpace(description)
return &EmbedInfo{
Provider: ProviderTwitter,
VideoID: tweetID,
Title: title,
Description: description,
ThumbnailURL: thumbnailURL,
ThumbnailURLs: thumbnailURLs,
VideoURL: videoURL,
EmbedHTML: embedHTML,
MediaType: mediaType,
}, nil
}
type oembedResponse struct {
Title string `json:"title"`
Description string `json:"description"`
ThumbnailURL string `json:"thumbnail_url"`
}
func fetchOEmbed(ctx context.Context, oembedURL string) (*oembedResponse, error) {
req, err := http.NewRequestWithContext(ctx, "GET", oembedURL, nil)
if err != nil {
return nil, err
}
client := &http.Client{Timeout: 10 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("oembed status: %d", resp.StatusCode)
}
var meta oembedResponse
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
return nil, err
}
return &meta, nil
}
// DownloadThumbnail downloads the thumbnail image for a video.
func DownloadThumbnail(ctx context.Context, thumbnailURL string) ([]byte, string, error) {
req, err := http.NewRequestWithContext(ctx, "GET", thumbnailURL, nil)
if err != nil {
return nil, "", err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
client := &http.Client{Timeout: 30 * time.Second}
resp, err := client.Do(req)
if err != nil {
return nil, "", err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, "", fmt.Errorf("thumbnail status: %d", resp.StatusCode)
}
contentType := resp.Header.Get("Content-Type")
data := make([]byte, 0, 1<<20) // 1MB initial capacity
buf := make([]byte, 32*1024)
for {
n, err := resp.Body.Read(buf)
if n > 0 {
data = append(data, buf[:n]...)
}
if err != nil {
break
}
}
return data, contentType, nil
}