import json, re, time, urllib.parse, urllib.request, urllib.error
from collections import Counter, defaultdict
UA={'User-Agent':'HermesResearch/1.0'}
BASE='https://www.reddit.com'; SUB='electricdaisycarnival'

def get(url):
    req=urllib.request.Request(url,headers=UA)
    with urllib.request.urlopen(req,timeout=20) as r: return json.loads(r.read().decode())

def listing(path, limit=50):
    url=BASE+path+('&' if '?' in path else '?')+urllib.parse.urlencode({'limit':limit,'raw_json':1})
    data=get(url)
    return [c['data'] for c in data['data'].get('children',[]) if c.get('kind')=='t3']

posts=[]
paths=[f'/r/{SUB}/hot.json', f'/r/{SUB}/top.json?t=month', f'/r/{SUB}/top.json?t=year']
for q in ['what to bring','packing list','essentials','first time tips','hydration pack','earplugs','portable charger','bag policy','locker','shoes','weather cold','security allowed items']:
    paths.append(f'/r/{SUB}/search.json?q={urllib.parse.quote(q)}&restrict_sr=on&sort=relevance&t=year')
for path in paths:
    try:
        posts += listing(path, 50)
        time.sleep(.25)
    except Exception as e:
        print('ERR',path,type(e).__name__,e)
seen=set(); uniq=[]
for p in posts:
    if p['id'] not in seen:
        seen.add(p['id']); uniq.append(p)
posts=uniq
keywords=re.compile(r'bring|packing|pack|essentials|first time|tip|allowed|prohibited|hydration|camelbak|shoes|earplug|charger|locker|bag|security|weather|cold|hot|shuttle|camp|kandi|gum|vicks|totem', re.I)
sample=sorted(posts,key=lambda p:(10 if keywords.search(p.get('title','')+' '+(p.get('selftext') or '')) else 0)+p.get('num_comments',0)*2+p.get('score',0)/20, reverse=True)[:30]
comments_by_id={}
for p in sample:
    try:
        data=get(BASE+p['permalink'].rstrip()+'.json?limit=50&sort=top&raw_json=1')
        comm=[]
        def walk(children):
            for c in children:
                if c.get('kind')!='t1': continue
                d=c['data']; body=d.get('body','')
                if body and body not in ('[deleted]','[removed]'): comm.append({'body':body,'score':d.get('score',0)})
                rep=d.get('replies')
                if isinstance(rep,dict): walk(rep['data'].get('children',[]))
        if len(data)>1: walk(data[1]['data'].get('children',[]))
        comments_by_id[p['id']]=comm[:50]
        time.sleep(.25)
    except Exception as e: print('comment err',p['id'],type(e).__name__,e)
patterns={
 'hydration pack / water bottle': r'\b(hydration|camelbak|hydropack|water pack|water bottle|refill|bladder)\b',
 'comfortable shoes / insoles': r'\b(shoes|sneakers|boots|insoles|feet|foot|blister|comfortable)\b',
 'earplugs': r'\b(earplugs?|ear plugs?|ear protection|earpeace|loops?|eargasm|hearing)\b',
 'portable charger / battery': r'\b(portable charger|power bank|battery pack|charger|charging)\b',
 'ID / wristband / ticket': r'\b(id|wristband|ticket|pass|passport|license)\b',
 'phone protection / tether': r'\b(phone|iphone|android|case|lanyard|tether|anti[- ]?theft|clutchloop)\b',
 'locker': r'\b(locker|lockers)\b',
 'jacket / hoodie / pashmina / layers': r'\b(jacket|hoodie|sweater|layers?|cold|windbreaker|pashmina)\b',
 'sun protection / fan / cooling towel': r'\b(sunscreen|sunblock|spf|sunglasses|hat|fan|cooling towel)\b',
 'wet wipes / tissues / sanitizer': r'\b(wet wipes|baby wipes|wipes|tissues|toilet paper|tp|sanitizer)\b',
 'gum / mints / candy': r'\b(gum|mints?|hi[- ]?chew|candy)\b',
 'chapstick / lip balm': r'\b(chapstick|lip balm|aquaphor|vaseline)\b',
 'meds / first aid / blister care': r'\b(meds|medicine|ibuprofen|advil|tylenol|allergy|band[- ]?aid|bandaid|blister|moleskin|narcan)\b',
 'clear bag / fanny pack': r'\b(clear bag|bag policy|fanny pack|sling|backpack|purse|camelbak|hydration pack)\b',
 'kandi / trinkets': r'\b(kandi|trinkets?|sprouts?|stickers?|trades?|bracelets?)\b',
 'totem / meetup marker': r'\b(totem|flag|meetup|marker|find friends|group)\b',
 'cash / cards': r'\b(cash|card|credit|debit|apple pay|wallet)\b',
 'mask / bandana / dust': r'\b(mask|bandana|dust|face cover|pashmina)\b',
 'deodorant / personal care': r'\b(deodorant|perfume|cologne|hair tie|tampons?|pads?)\b',
 'snacks': r'\b(snacks?|protein bar|granola|food)\b',
}
cat=Counter(); evidence=defaultdict(list)
for p in posts:
    text='\n'.join([p.get('title',''),p.get('selftext') or '']+[c['body'] for c in comments_by_id.get(p['id'],[])])
    for name,pat in patterns.items():
        if re.search(pat,text,re.I):
            cat[name]+=1
            if len(evidence[name])<4:
                evidence[name].append({'title':p['title'],'score':p.get('score'),'comments':p.get('num_comments'),'url':'https://reddit.com'+p['permalink'],'snippet':(p.get('selftext') or p.get('title') or '')[:600]})
relevant=[p for p in posts if keywords.search(p.get('title','')+' '+(p.get('selftext') or ''))]
comments=[]
for pid,cs in comments_by_id.items():
    post=next((p for p in posts if p['id']==pid),None)
    for c in cs:
        if keywords.search(c['body']): comments.append({'post_title':post['title'] if post else pid,'score':c['score'],'body':c['body'][:700],'url':'https://reddit.com'+post['permalink'] if post else ''})
report={'subreddit':SUB,'post_count':len(posts),'comment_posts_sampled':len(comments_by_id),'category_counts':cat.most_common(),'evidence':evidence,'top_relevant_posts':[{'title':p['title'],'score':p.get('score'),'comments':p.get('num_comments'),'url':'https://reddit.com'+p['permalink'],'selftext':(p.get('selftext') or '')[:900]} for p in sorted(relevant,key=lambda p:p.get('num_comments',0)*3+p.get('score',0),reverse=True)[:30]],'high_comment_snippets':sorted(comments,key=lambda x:x['score'],reverse=True)[:50]}
path='/Users/cynthia/.hermes/hermes-agent/edc_festival_reddit_report.json'
open(path,'w').write(json.dumps(report,indent=2))
print(path)
print('posts',len(posts),'comment_posts',len(comments_by_id))
for k,v in cat.most_common(): print(f'{k}: {v} ({v/len(posts):.1%})')
