Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions agent_reach/channels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .exa_search import ExaSearchChannel
from .xiaohongshu import XiaoHongShuChannel
from .douyin import DouyinChannel
from .facebook import FacebookChannel
from .linkedin import LinkedInChannel
from .wechat import WeChatChannel
from .weibo import WeiboChannel
Expand All @@ -33,6 +34,7 @@
BilibiliChannel(),
XiaoHongShuChannel(),
DouyinChannel(),
FacebookChannel(),
LinkedInChannel(),
WeChatChannel(),
WeiboChannel(),
Expand Down
62 changes: 62 additions & 0 deletions agent_reach/channels/facebook.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# -*- coding: utf-8 -*-
"""Facebook — public videos via yt-dlp, text posts via Jina Reader."""

import shutil
from urllib.parse import urlparse

from .base import Channel


class FacebookChannel(Channel):
"""Facebook public videos and posts.

- Public videos: extracted via yt-dlp
- Text posts / articles: fetched via Jina Reader (no installation required)
"""

name = "facebook"
description = "Facebook 公开帖子和视频"
backends = ["yt-dlp", "Jina Reader"]
tier = 0

def can_handle(self, url: str) -> bool:
"""Return True for facebook.com, m.facebook.com, fb.com and fb.watch URLs."""
d = urlparse(url).netloc.lower()
return "facebook.com" in d or "fb.com" in d or "fb.watch" in d

def check(self, config=None):
"""Check backend availability.

Returns:
('ok', msg) — yt-dlp present; full video + text support
('warn', msg) — yt-dlp missing; text posts still work via Jina Reader
('off', msg) — neither backend available (should not happen; Jina Reader is built-in)
"""
has_ytdlp = bool(shutil.which("yt-dlp"))
if has_ytdlp:
return "ok", "可提取公开视频元数据;文字帖子可通过 Jina Reader 读取"
return "warn", (
"yt-dlp 未安装,视频提取不可用。安装:pip install yt-dlp\n"
" 文字帖子仍可通过 Jina Reader 读取。"
)

def read(self, url: str) -> str:
"""Fetch content from a Facebook URL.

Videos are routed to yt-dlp; text posts / articles use Jina Reader.
Raises NotImplementedError until upstream integration is wired in.
"""
raise NotImplementedError(
"Direct read() not yet implemented. "
"For videos call yt-dlp directly; for text posts use Jina Reader at "
"https://r.jina.ai/<url>."
)

def search(self, query: str) -> str:
"""Search Facebook content.

Facebook has no public search API. Raises NotImplementedError.
"""
raise NotImplementedError(
"Facebook search is not supported — no public search API available."
)
184 changes: 141 additions & 43 deletions tests/test_channels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from urllib.error import URLError

from agent_reach.channels import get_all_channels, get_channel
from agent_reach.channels.facebook import FacebookChannel
from agent_reach.channels.v2ex import V2EXChannel
from agent_reach.channels.xiaohongshu import XiaoHongShuChannel
from agent_reach.channels.xueqiu import XueqiuChannel
Expand Down Expand Up @@ -127,15 +128,27 @@ def test_get_hot_topics_respects_limit(self, monkeypatch):
import urllib.request

fake_data = [
{"id": i, "title": f"Topic {i}", "url": f"https://v2ex.com/t/{i}", "replies": i,
"content": "", "created": 1700000000 + i, "node": {"name": "tech", "title": "Tech"}}
{
"id": i,
"title": f"Topic {i}",
"url": f"https://v2ex.com/t/{i}",
"replies": i,
"content": "",
"created": 1700000000 + i,
"node": {"name": "tech", "title": "Tech"},
}
for i in range(10)
]

class FakeResponse:
def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return json.dumps(fake_data).encode()
def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return json.dumps(fake_data).encode()

monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
topics = V2EXChannel().get_hot_topics(limit=3)
Expand All @@ -146,14 +159,26 @@ def test_get_hot_topics_truncates_content(self, monkeypatch):

long_content = "A" * 300
fake_data = [
{"id": 1, "title": "Long post", "url": "https://v2ex.com/t/1", "replies": 0,
"content": long_content, "created": 1700000000, "node": {"name": "tech", "title": "Tech"}}
{
"id": 1,
"title": "Long post",
"url": "https://v2ex.com/t/1",
"replies": 0,
"content": long_content,
"created": 1700000000,
"node": {"name": "tech", "title": "Tech"},
}
]

class FakeResponse:
def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return json.dumps(fake_data).encode()
def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return json.dumps(fake_data).encode()

monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
topics = V2EXChannel().get_hot_topics(limit=1)
Expand All @@ -179,9 +204,14 @@ def test_get_node_topics(self, monkeypatch):
]

class FakeResponse:
def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return json.dumps(fake_data).encode()
def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return json.dumps(fake_data).encode()

monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
topics = V2EXChannel().get_node_topics("python")
Expand Down Expand Up @@ -227,9 +257,14 @@ class FakeResponse:
def __init__(self, payload):
self._payload = payload

def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return json.dumps(self._payload).encode()
def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return json.dumps(self._payload).encode()

def fake_urlopen(req, timeout=None):
url = req.full_url
Expand Down Expand Up @@ -265,10 +300,17 @@ def test_get_topic_handles_empty_replies(self, monkeypatch):
]

class FakeResponse:
def __init__(self, payload): self._payload = payload
def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return json.dumps(self._payload).encode()
def __init__(self, payload):
self._payload = payload

def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return json.dumps(self._payload).encode()

def fake_urlopen(req, timeout=None):
if "replies" in req.full_url:
Expand Down Expand Up @@ -302,9 +344,14 @@ def test_get_user_returns_profile(self, monkeypatch):
}

class FakeResponse:
def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return json.dumps(fake_user).encode()
def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return json.dumps(fake_user).encode()

monkeypatch.setattr(urllib.request, "urlopen", lambda req, timeout=None: FakeResponse())
user = V2EXChannel().get_user("alice")
Expand Down Expand Up @@ -342,9 +389,7 @@ def test_check_ok_when_api_reachable(self, monkeypatch):

fake_response_data = {
"data": {
"items": [
{"quote": {"symbol": "SH000001", "name": "上证指数", "current": 3200.0}}
]
"items": [{"quote": {"symbol": "SH000001", "name": "上证指数", "current": 3200.0}}]
}
}

Expand Down Expand Up @@ -485,7 +530,14 @@ def make_item(id_, title, text, author, likes, target):

fake_data = {
"list": [
make_item(111, "市场分析", "<p>今天大盘走势&amp;分析</p>", "投资者A", 42, "/1234567890/111"),
make_item(
111,
"市场分析",
"<p>今天大盘走势&amp;分析</p>",
"投资者A",
42,
"/1234567890/111",
),
make_item(222, "", "短评", "投资者B", 10, "/9876543210/222"),
]
}
Expand Down Expand Up @@ -518,14 +570,16 @@ def test_get_hot_posts_respects_limit(self, monkeypatch):
fake_data = {
"list": [
{
"data": json.dumps({
"id": i,
"title": f"Post {i}",
"text": f"Content {i}",
"user": {"screen_name": f"User {i}"},
"like_count": i,
"target": f"/user/{i}",
}),
"data": json.dumps(
{
"id": i,
"title": f"Post {i}",
"text": f"Content {i}",
"user": {"screen_name": f"User {i}"},
"like_count": i,
"target": f"/user/{i}",
}
),
"original_status": None,
}
for i in range(10)
Expand Down Expand Up @@ -601,18 +655,24 @@ def get(self, key, default=None):
return default

import agent_reach.channels.xueqiu as xq_mod

monkeypatch.setattr(
xq_mod,
"_load_cookies_from_config",
lambda: (xq_mod._inject_cookie_string("xq_a_token=TESTTOKEN; xq_is_login=1") or True),
lambda: xq_mod._inject_cookie_string("xq_a_token=TESTTOKEN; xq_is_login=1") or True,
)
monkeypatch.setattr(xq_mod, "_load_cookies_from_browser", lambda: False)

# Patch opener so no real HTTP call is made
class FakeResp:
def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return b'{"data":{"items":[]}}'
def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return b'{"data":{"items":[]}}'

monkeypatch.setattr(xq_mod._opener, "open", lambda req, timeout=None: FakeResp())

Expand All @@ -629,9 +689,14 @@ def test_get_json_sends_referer_and_browser_ua(self, monkeypatch):
captured = {}

class FakeResp:
def __enter__(self): return self
def __exit__(self, *_): pass
def read(self): return b'{"data":{"items":[]}}'
def __enter__(self):
return self

def __exit__(self, *_):
pass

def read(self):
return b'{"data":{"items":[]}}'

def fake_open(req, timeout=None):
captured["ua"] = req.get_header("User-agent")
Expand Down Expand Up @@ -729,7 +794,9 @@ def test_reports_warn_when_not_authenticated(self, monkeypatch):
monkeypatch.setattr(shutil, "which", lambda _: "/usr/local/bin/xhs")

def fake_run(cmd, **kwargs):
return subprocess.CompletedProcess(cmd, 1, "", "ok: false\nerror:\n code: not_authenticated\n")
return subprocess.CompletedProcess(
cmd, 1, "", "ok: false\nerror:\n code: not_authenticated\n"
)

monkeypatch.setattr(subprocess, "run", fake_run)

Expand All @@ -742,3 +809,34 @@ def test_reports_off_when_not_installed(self, monkeypatch):
status, msg = XiaoHongShuChannel().check()
assert status == "off"
assert "xiaohongshu-cli" in msg


class TestFacebookChannel:
def test_can_handle_facebook_urls(self):
ch = FacebookChannel()
assert ch.can_handle("https://www.facebook.com/video/12345")
assert ch.can_handle("https://facebook.com/groups/python/posts/1")
assert ch.can_handle("https://m.facebook.com/story.php?id=1234")
assert ch.can_handle("https://fb.com/video/12345")
assert ch.can_handle("https://fb.watch/abc123/")
assert not ch.can_handle("https://instagram.com/p/abc/")
assert not ch.can_handle("https://twitter.com/user")

def test_check_ok_when_ytdlp_installed(self, monkeypatch):
monkeypatch.setattr(
shutil, "which", lambda cmd: "/usr/local/bin/yt-dlp" if cmd == "yt-dlp" else None
)
status, msg = FacebookChannel().check()
assert status == "ok"
assert "视频" in msg or "Jina" in msg

def test_check_warn_when_ytdlp_missing(self, monkeypatch):
monkeypatch.setattr(shutil, "which", lambda _: None)
status, msg = FacebookChannel().check()
assert status == "warn"
assert "yt-dlp" in msg
assert "Jina" in msg

def test_registered_in_all_channels(self):
names = [ch.name for ch in get_all_channels()]
assert "facebook" in names
Loading