- Use FxTwitter API for full note tweet text (with syndication API fallback) - Save Twitter posts based on media content: - Videos → embed type (proxied video) - Images → image type (gallery) - Text-only → quote type - Add granular preview badges: 'X VIDEO', 'X GALLERY', 'X POST' - Preserve formatting/spacing with white-space: pre-wrap for quotes and descriptions - Rename VideoInfo to EmbedInfo for better semantic clarity
403 lines
12 KiB
Go
403 lines
12 KiB
Go
package embed
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
// Provider identifies the video hosting platform.
|
|
type Provider string
|
|
|
|
const (
|
|
ProviderYouTube Provider = "youtube"
|
|
ProviderVimeo Provider = "vimeo"
|
|
ProviderTwitter Provider = "twitter"
|
|
)
|
|
|
|
// EmbedInfo contains information about an embed (video, image gallery, or text).
|
|
type EmbedInfo struct {
|
|
Provider Provider
|
|
VideoID string
|
|
Title string
|
|
Description string
|
|
ThumbnailURL string // First/primary thumbnail (for backward compatibility)
|
|
ThumbnailURLs []string // All thumbnail URLs (for multi-image tweets)
|
|
EmbedHTML string
|
|
VideoURL string // Direct video URL (for Twitter videos)
|
|
MediaType string // "video", "images", "text" - for Twitter only
|
|
}
|
|
|
|
var (
|
|
youtubeRegex = regexp.MustCompile(`(?:youtube\.com/(?:watch\?v=|embed/|v/|shorts/)|youtu\.be/)([a-zA-Z0-9_-]{11})`)
|
|
vimeoRegex = regexp.MustCompile(`(?:vimeo\.com/(?:video/)?|player\.vimeo\.com/video/)(\d+)`)
|
|
twitterRegex = regexp.MustCompile(`(?:twitter\.com|x\.com)/([^/]+)/status/(\d+)`)
|
|
tcoRegex = regexp.MustCompile(`\s*https://t\.co/\S+`)
|
|
)
|
|
|
|
// Detect checks if a URL is a YouTube, Vimeo, or Twitter/X post and returns its info.
|
|
func Detect(ctx context.Context, targetURL string) (*EmbedInfo, error) {
|
|
// Try YouTube
|
|
if matches := youtubeRegex.FindStringSubmatch(targetURL); len(matches) > 1 {
|
|
return fetchYouTube(ctx, matches[1])
|
|
}
|
|
|
|
// Try Vimeo
|
|
if matches := vimeoRegex.FindStringSubmatch(targetURL); len(matches) > 1 {
|
|
return fetchVimeo(ctx, matches[1])
|
|
}
|
|
|
|
// Try Twitter/X
|
|
if matches := twitterRegex.FindStringSubmatch(targetURL); len(matches) > 2 {
|
|
return fetchTwitter(ctx, matches[2], targetURL)
|
|
}
|
|
|
|
return nil, nil // Not a recognized embed
|
|
}
|
|
|
|
func fetchYouTube(ctx context.Context, videoID string) (*EmbedInfo, error) {
|
|
// YouTube thumbnails are available without API
|
|
thumbnailURL := fmt.Sprintf("https://img.youtube.com/vi/%s/maxresdefault.jpg", videoID)
|
|
|
|
// Try to get metadata via oEmbed
|
|
oembedURL := fmt.Sprintf("https://www.youtube.com/oembed?url=%s&format=json",
|
|
url.QueryEscape("https://www.youtube.com/watch?v="+videoID))
|
|
|
|
var title string
|
|
if meta, err := fetchOEmbed(ctx, oembedURL); err == nil {
|
|
title = meta.Title
|
|
}
|
|
|
|
embedHTML := fmt.Sprintf(
|
|
`<iframe width="560" height="315" src="https://www.youtube.com/embed/%s" frameborder="0" allowfullscreen></iframe>`,
|
|
videoID,
|
|
)
|
|
|
|
return &EmbedInfo{
|
|
Provider: ProviderYouTube,
|
|
VideoID: videoID,
|
|
Title: title,
|
|
ThumbnailURL: thumbnailURL,
|
|
EmbedHTML: embedHTML,
|
|
}, nil
|
|
}
|
|
|
|
func fetchVimeo(ctx context.Context, videoID string) (*EmbedInfo, error) {
|
|
oembedURL := fmt.Sprintf("https://vimeo.com/api/oembed.json?url=%s",
|
|
url.QueryEscape("https://vimeo.com/"+videoID))
|
|
|
|
meta, err := fetchOEmbed(ctx, oembedURL)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("vimeo oembed: %w", err)
|
|
}
|
|
|
|
embedHTML := fmt.Sprintf(
|
|
`<iframe src="https://player.vimeo.com/video/%s" width="640" height="360" frameborder="0" allowfullscreen></iframe>`,
|
|
videoID,
|
|
)
|
|
|
|
return &EmbedInfo{
|
|
Provider: ProviderVimeo,
|
|
VideoID: videoID,
|
|
Title: meta.Title,
|
|
Description: meta.Description,
|
|
ThumbnailURL: meta.ThumbnailURL,
|
|
EmbedHTML: embedHTML,
|
|
}, nil
|
|
}
|
|
|
|
// fxTwitterResponse represents the FxTwitter API response
|
|
type fxTwitterResponse struct {
|
|
Code int `json:"code"`
|
|
Message string `json:"message"`
|
|
Tweet struct {
|
|
Text string `json:"text"`
|
|
Author struct {
|
|
Name string `json:"name"`
|
|
ScreenName string `json:"screen_name"`
|
|
} `json:"author"`
|
|
Media struct {
|
|
Photos []struct {
|
|
URL string `json:"url"`
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
} `json:"photos"`
|
|
Videos []struct {
|
|
URL string `json:"url"`
|
|
} `json:"videos"`
|
|
} `json:"media"`
|
|
IsNoteTweet bool `json:"is_note_tweet"`
|
|
} `json:"tweet"`
|
|
}
|
|
|
|
// twitterSyndicationResponse represents the Twitter syndication API response (fallback)
|
|
type twitterSyndicationResponse struct {
|
|
Text string `json:"text"`
|
|
User struct {
|
|
Name string `json:"name"`
|
|
ScreenName string `json:"screen_name"`
|
|
} `json:"user"`
|
|
Photos []struct {
|
|
URL string `json:"url"`
|
|
Width int `json:"width"`
|
|
Height int `json:"height"`
|
|
} `json:"photos"`
|
|
MediaDetails []struct {
|
|
MediaURLHTTPS string `json:"media_url_https"`
|
|
Type string `json:"type"`
|
|
VideoInfo struct {
|
|
Variants []struct {
|
|
ContentType string `json:"content_type"`
|
|
URL string `json:"url"`
|
|
Bitrate int `json:"bitrate,omitempty"`
|
|
} `json:"variants"`
|
|
} `json:"video_info"`
|
|
} `json:"mediaDetails"`
|
|
Video struct {
|
|
Poster string `json:"poster"`
|
|
} `json:"video"`
|
|
}
|
|
|
|
func fetchTwitter(ctx context.Context, tweetID string, originalURL string) (*EmbedInfo, error) {
|
|
// Extract username from URL for FxTwitter API
|
|
matches := twitterRegex.FindStringSubmatch(originalURL)
|
|
if len(matches) < 3 {
|
|
return nil, fmt.Errorf("invalid twitter URL format")
|
|
}
|
|
username := matches[1]
|
|
|
|
// Try FxTwitter API first (supports full note tweets)
|
|
fxURL := fmt.Sprintf("https://api.fxtwitter.com/%s/status/%s", username, tweetID)
|
|
req, err := http.NewRequestWithContext(ctx, "GET", fxURL, nil)
|
|
if err == nil {
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
|
|
client := &http.Client{Timeout: 10 * time.Second}
|
|
resp, err := client.Do(req)
|
|
if err == nil {
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode == http.StatusOK {
|
|
var fxResp fxTwitterResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&fxResp); err == nil && fxResp.Code == 200 {
|
|
// Successfully got data from FxTwitter
|
|
return parseFxTwitterResponse(&fxResp, tweetID, originalURL)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Fallback to syndication API (may have truncated text for note tweets)
|
|
return fetchTwitterSyndication(ctx, tweetID, originalURL)
|
|
}
|
|
|
|
func parseFxTwitterResponse(fxResp *fxTwitterResponse, tweetID string, originalURL string) (*EmbedInfo, error) {
|
|
tweet := &fxResp.Tweet
|
|
|
|
// Collect media
|
|
var thumbnailURL, videoURL string
|
|
var thumbnailURLs []string
|
|
var mediaType string
|
|
|
|
if len(tweet.Media.Videos) > 0 {
|
|
videoURL = tweet.Media.Videos[0].URL
|
|
mediaType = "video"
|
|
// Videos usually have a poster/thumbnail in photos
|
|
if len(tweet.Media.Photos) > 0 {
|
|
thumbnailURL = tweet.Media.Photos[0].URL
|
|
}
|
|
} else if len(tweet.Media.Photos) > 0 {
|
|
thumbnailURL = tweet.Media.Photos[0].URL
|
|
for _, photo := range tweet.Media.Photos {
|
|
thumbnailURLs = append(thumbnailURLs, photo.URL)
|
|
}
|
|
mediaType = "images"
|
|
} else {
|
|
mediaType = "text"
|
|
}
|
|
|
|
// Build embed HTML using Twitter's embed widget
|
|
embedHTML := fmt.Sprintf(
|
|
`<blockquote class="twitter-tweet"><a href="%s"></a></blockquote><script async src="https://platform.twitter.com/widgets.js"></script>`,
|
|
originalURL,
|
|
)
|
|
|
|
title := fmt.Sprintf("@%s", tweet.Author.ScreenName)
|
|
if tweet.Author.Name != "" {
|
|
title = fmt.Sprintf("%s (@%s)", tweet.Author.Name, tweet.Author.ScreenName)
|
|
}
|
|
|
|
// Clean up tweet text - remove trailing t.co URLs
|
|
description := tcoRegex.ReplaceAllString(tweet.Text, "")
|
|
description = strings.TrimSpace(description)
|
|
|
|
return &EmbedInfo{
|
|
Provider: ProviderTwitter,
|
|
VideoID: tweetID,
|
|
Title: title,
|
|
Description: description,
|
|
ThumbnailURL: thumbnailURL,
|
|
ThumbnailURLs: thumbnailURLs,
|
|
VideoURL: videoURL,
|
|
EmbedHTML: embedHTML,
|
|
MediaType: mediaType,
|
|
}, nil
|
|
}
|
|
|
|
func fetchTwitterSyndication(ctx context.Context, tweetID string, originalURL string) (*EmbedInfo, error) {
|
|
apiURL := fmt.Sprintf("https://cdn.syndication.twimg.com/tweet-result?id=%s&token=0", tweetID)
|
|
|
|
req, err := http.NewRequestWithContext(ctx, "GET", apiURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
|
|
|
|
client := &http.Client{Timeout: 10 * time.Second}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("twitter syndication: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("twitter syndication status: %d", resp.StatusCode)
|
|
}
|
|
|
|
var tweet twitterSyndicationResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&tweet); err != nil {
|
|
return nil, fmt.Errorf("twitter syndication decode: %w", err)
|
|
}
|
|
|
|
// Find thumbnail and video URL from media
|
|
var thumbnailURL, videoURL string
|
|
var thumbnailURLs []string
|
|
var mediaType string
|
|
|
|
if len(tweet.Photos) > 0 {
|
|
thumbnailURL = tweet.Photos[0].URL
|
|
// Collect all photo URLs for multi-image tweets
|
|
for _, photo := range tweet.Photos {
|
|
thumbnailURLs = append(thumbnailURLs, photo.URL)
|
|
}
|
|
mediaType = "images"
|
|
} else if len(tweet.MediaDetails) > 0 {
|
|
media := tweet.MediaDetails[0]
|
|
thumbnailURL = media.MediaURLHTTPS
|
|
|
|
// Extract video URL - find highest bitrate MP4
|
|
if media.Type == "video" || media.Type == "animated_gif" {
|
|
var bestBitrate int
|
|
for _, v := range media.VideoInfo.Variants {
|
|
if v.ContentType == "video/mp4" && v.Bitrate >= bestBitrate {
|
|
bestBitrate = v.Bitrate
|
|
videoURL = v.URL
|
|
}
|
|
}
|
|
mediaType = "video"
|
|
}
|
|
} else {
|
|
mediaType = "text"
|
|
}
|
|
|
|
if thumbnailURL == "" && tweet.Video.Poster != "" {
|
|
thumbnailURL = tweet.Video.Poster
|
|
}
|
|
|
|
// Build embed HTML using Twitter's embed widget
|
|
embedHTML := fmt.Sprintf(
|
|
`<blockquote class="twitter-tweet"><a href="%s"></a></blockquote><script async src="https://platform.twitter.com/widgets.js"></script>`,
|
|
originalURL,
|
|
)
|
|
|
|
title := fmt.Sprintf("@%s", tweet.User.ScreenName)
|
|
if tweet.User.Name != "" {
|
|
title = fmt.Sprintf("%s (@%s)", tweet.User.Name, tweet.User.ScreenName)
|
|
}
|
|
|
|
// Clean up tweet text - remove trailing t.co URLs
|
|
description := tcoRegex.ReplaceAllString(tweet.Text, "")
|
|
description = strings.TrimSpace(description)
|
|
|
|
return &EmbedInfo{
|
|
Provider: ProviderTwitter,
|
|
VideoID: tweetID,
|
|
Title: title,
|
|
Description: description,
|
|
ThumbnailURL: thumbnailURL,
|
|
ThumbnailURLs: thumbnailURLs,
|
|
VideoURL: videoURL,
|
|
EmbedHTML: embedHTML,
|
|
MediaType: mediaType,
|
|
}, nil
|
|
}
|
|
|
|
type oembedResponse struct {
|
|
Title string `json:"title"`
|
|
Description string `json:"description"`
|
|
ThumbnailURL string `json:"thumbnail_url"`
|
|
}
|
|
|
|
func fetchOEmbed(ctx context.Context, oembedURL string) (*oembedResponse, error) {
|
|
req, err := http.NewRequestWithContext(ctx, "GET", oembedURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
client := &http.Client{Timeout: 10 * time.Second}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("oembed status: %d", resp.StatusCode)
|
|
}
|
|
|
|
var meta oembedResponse
|
|
if err := json.NewDecoder(resp.Body).Decode(&meta); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return &meta, nil
|
|
}
|
|
|
|
// DownloadThumbnail downloads the thumbnail image for a video.
|
|
func DownloadThumbnail(ctx context.Context, thumbnailURL string) ([]byte, string, error) {
|
|
req, err := http.NewRequestWithContext(ctx, "GET", thumbnailURL, nil)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (compatible; Lookbook/1.0)")
|
|
|
|
client := &http.Client{Timeout: 30 * time.Second}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return nil, "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, "", fmt.Errorf("thumbnail status: %d", resp.StatusCode)
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
|
|
data := make([]byte, 0, 1<<20) // 1MB initial capacity
|
|
buf := make([]byte, 32*1024)
|
|
for {
|
|
n, err := resp.Body.Read(buf)
|
|
if n > 0 {
|
|
data = append(data, buf[:n]...)
|
|
}
|
|
if err != nil {
|
|
break
|
|
}
|
|
}
|
|
|
|
return data, contentType, nil
|
|
}
|